From 1e47d70530775f0394af8b7e18b2f8987c4292cb Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof@amd.com>
Date: Thu, 7 Mar 2024 11:36:29 +0000
Subject: [PATCH 1/2] [Flang][OpenMP][MLIR][Lower] Update lowering to use
 wrapper ops

This patch introduces the following changes:
  - Complete OpenMP directive sets to include masked and taskloop-related ones.
  - Refactor clause processing to store related operands in structures. These
are combined into construct-specific operand structures using a mixin pattern
and simplify keeping track of operand lists used for creating OpenMP
operations and reduces argument lists.
  - Update the lowering to MLIR of loop constructs to follow the wrapper +
canonical loop approach. This includes some changes to privatization that need
testing.
  - Add lowering for the omp.canonical_loop operation, based on omp.wsloop.
  - Significant refactor of OpenMP construct lowering, separating composite
constructs into their own "gen" functions and handling combined loop constructs
in a more scalable way. Updated genOMP functions to follow the same set of
patterns to make it more understandable. Split clause processing calls from
code generation for operations in preparation for reusing the same code when
dealing with composite constructs.
  - Add basic support for taskloop, for completeness sake.
  - Document missing clauses.
  - Move some code from OpenMP.cpp to ClauseProcessor.cpp and call it there
to simplify calls to certain process methods that were always followed by the
same post-processing.
  - Add and populate "composite" attribute on wrapper operations.
  - Add empty constructors for wrapper operations.
  - Update `LoopWrapperInterface` to avoid compilation problems.
  - Update `Task*` operation's names to follow the same capitalization
convention.
  - Changes to SCF to OpenMP conversion to produce a wrapper omp.wsloop with a
nested omp.canonical_loop rather than an invalid omp.wsloop.
  - Document missing verifier checks on wrapper operations.
  - Make temporary changes to OpenMP to LLVM IR translation to make this
compilable.

All these changes are still WIP, as they haven't been tested.
---
 .../flang/Semantics/openmp-directive-sets.h   |  172 +-
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  267 +-
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |  121 +-
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |   32 +-
 flang/lib/Lower/OpenMP/DataSharingProcessor.h |    1 +
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 3261 ++++++++++-------
 flang/lib/Lower/OpenMP/OperationClauses.h     |  306 ++
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |    2 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.h   |    2 +-
 flang/lib/Lower/OpenMP/Utils.cpp              |   19 +
 flang/lib/Lower/OpenMP/Utils.h                |    3 +
 .../Transforms/DoConcurrentConversion.cpp     |   17 +-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |    8 +-
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |    4 +-
 .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp    |   10 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |    3 +
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |   50 +-
 17 files changed, 2503 insertions(+), 1775 deletions(-)
 create mode 100644 flang/lib/Lower/OpenMP/OperationClauses.h
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h
index a4f27b00152e2..a40073ff47914 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -24,68 +24,80 @@ namespace llvm::omp {
 // - all<Directive>Set: All standalone or combined uses of the directive.
 
 static const OmpDirectiveSet topParallelSet{
-    Directive::OMPD_parallel,
-    Directive::OMPD_parallel_do,
     Directive::OMPD_parallel_do_simd,
+    Directive::OMPD_parallel_do,
+    Directive::OMPD_parallel_masked_taskloop_simd,
+    Directive::OMPD_parallel_masked_taskloop,
+    Directive::OMPD_parallel_master_taskloop_simd,
+    Directive::OMPD_parallel_master_taskloop,
     Directive::OMPD_parallel_sections,
     Directive::OMPD_parallel_workshare,
+    Directive::OMPD_parallel,
 };
 
 static const OmpDirectiveSet allParallelSet{
-    Directive::OMPD_distribute_parallel_do,
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_parallel,
-    Directive::OMPD_parallel_do,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_parallel_sections,
-    Directive::OMPD_parallel_workshare,
-    Directive::OMPD_target_parallel,
-    Directive::OMPD_target_parallel_do,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_parallel_do,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_parallel_do,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_parallel_do,
+        Directive::OMPD_target_parallel,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_parallel_do,
+    } | topParallelSet,
 };
 
 static const OmpDirectiveSet topDoSet{
-    Directive::OMPD_do,
     Directive::OMPD_do_simd,
+    Directive::OMPD_do,
 };
 
 static const OmpDirectiveSet allDoSet{
-    Directive::OMPD_distribute_parallel_do,
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_parallel_do,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_do,
-    Directive::OMPD_do_simd,
-    Directive::OMPD_target_parallel_do,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_parallel_do,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_parallel_do,
+        Directive::OMPD_parallel_do_simd,
+        Directive::OMPD_parallel_do,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_parallel_do,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_parallel_do,
+    } | topDoSet,
 };
 
 static const OmpDirectiveSet topTaskloopSet{
-    Directive::OMPD_taskloop,
     Directive::OMPD_taskloop_simd,
+    Directive::OMPD_taskloop,
 };
 
-static const OmpDirectiveSet allTaskloopSet{topTaskloopSet};
+static const OmpDirectiveSet allTaskloopSet{
+    OmpDirectiveSet{
+        Directive::OMPD_masked_taskloop_simd,
+        Directive::OMPD_masked_taskloop,
+        Directive::OMPD_master_taskloop_simd,
+        Directive::OMPD_master_taskloop,
+        Directive::OMPD_parallel_masked_taskloop_simd,
+        Directive::OMPD_parallel_masked_taskloop,
+        Directive::OMPD_parallel_master_taskloop_simd,
+        Directive::OMPD_parallel_master_taskloop,
+    } | topTaskloopSet,
+};
 
 static const OmpDirectiveSet topTargetSet{
-    Directive::OMPD_target,
-    Directive::OMPD_target_parallel,
-    Directive::OMPD_target_parallel_do,
     Directive::OMPD_target_parallel_do_simd,
+    Directive::OMPD_target_parallel_do,
+    Directive::OMPD_target_parallel,
     Directive::OMPD_target_simd,
-    Directive::OMPD_target_teams,
-    Directive::OMPD_target_teams_distribute,
-    Directive::OMPD_target_teams_distribute_parallel_do,
     Directive::OMPD_target_teams_distribute_parallel_do_simd,
+    Directive::OMPD_target_teams_distribute_parallel_do,
     Directive::OMPD_target_teams_distribute_simd,
+    Directive::OMPD_target_teams_distribute,
+    Directive::OMPD_target_teams,
+    Directive::OMPD_target,
 };
 
 static const OmpDirectiveSet allTargetSet{topTargetSet};
@@ -95,61 +107,61 @@ static const OmpDirectiveSet topSimdSet{
 };
 
 static const OmpDirectiveSet allSimdSet{
-    Directive::OMPD_distribute_parallel_do_simd,
-    Directive::OMPD_distribute_simd,
-    Directive::OMPD_do_simd,
-    Directive::OMPD_parallel_do_simd,
-    Directive::OMPD_simd,
-    Directive::OMPD_target_parallel_do_simd,
-    Directive::OMPD_target_simd,
-    Directive::OMPD_target_teams_distribute_parallel_do_simd,
-    Directive::OMPD_target_teams_distribute_simd,
-    Directive::OMPD_taskloop_simd,
-    Directive::OMPD_teams_distribute_parallel_do_simd,
-    Directive::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        Directive::OMPD_distribute_parallel_do_simd,
+        Directive::OMPD_distribute_simd,
+        Directive::OMPD_do_simd,
+        Directive::OMPD_masked_taskloop_simd,
+        Directive::OMPD_master_taskloop_simd,
+        Directive::OMPD_parallel_do_simd,
+        Directive::OMPD_parallel_masked_taskloop_simd,
+        Directive::OMPD_parallel_master_taskloop_simd,
+        Directive::OMPD_target_parallel_do_simd,
+        Directive::OMPD_target_simd,
+        Directive::OMPD_target_teams_distribute_parallel_do_simd,
+        Directive::OMPD_target_teams_distribute_simd,
+        Directive::OMPD_taskloop_simd,
+        Directive::OMPD_teams_distribute_parallel_do_simd,
+        Directive::OMPD_teams_distribute_simd,
+    } | topSimdSet,
 };
 
 static const OmpDirectiveSet topTeamsSet{
-    Directive::OMPD_teams,
-    Directive::OMPD_teams_distribute,
-    Directive::OMPD_teams_distribute_parallel_do,
     Directive::OMPD_teams_distribute_parallel_do_simd,
+    Directive::OMPD_teams_distribute_parallel_do,
     Directive::OMPD_teams_distribute_simd,
+    Directive::OMPD_teams_distribute,
+    Directive::OMPD_teams,
 };
 
 static const OmpDirectiveSet allTeamsSet{
-    llvm::omp::OMPD_target_teams,
-    llvm::omp::OMPD_target_teams_distribute,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_target_teams_distribute_simd,
-    llvm::omp::OMPD_teams,
-    llvm::omp::OMPD_teams_distribute,
-    llvm::omp::OMPD_teams_distribute_parallel_do,
-    llvm::omp::OMPD_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_target_teams_distribute_parallel_do,
+        llvm::omp::OMPD_target_teams_distribute_simd,
+        llvm::omp::OMPD_target_teams_distribute,
+        llvm::omp::OMPD_target_teams,
+    } | topTeamsSet,
 };
 
 static const OmpDirectiveSet topDistributeSet{
-    Directive::OMPD_distribute,
-    Directive::OMPD_distribute_parallel_do,
     Directive::OMPD_distribute_parallel_do_simd,
+    Directive::OMPD_distribute_parallel_do,
     Directive::OMPD_distribute_simd,
+    Directive::OMPD_distribute,
 };
 
 static const OmpDirectiveSet allDistributeSet{
-    llvm::omp::OMPD_distribute,
-    llvm::omp::OMPD_distribute_parallel_do,
-    llvm::omp::OMPD_distribute_parallel_do_simd,
-    llvm::omp::OMPD_distribute_simd,
-    llvm::omp::OMPD_target_teams_distribute,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do,
-    llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_target_teams_distribute_simd,
-    llvm::omp::OMPD_teams_distribute,
-    llvm::omp::OMPD_teams_distribute_parallel_do,
-    llvm::omp::OMPD_teams_distribute_parallel_do_simd,
-    llvm::omp::OMPD_teams_distribute_simd,
+    OmpDirectiveSet{
+        llvm::omp::OMPD_target_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_target_teams_distribute_parallel_do,
+        llvm::omp::OMPD_target_teams_distribute_simd,
+        llvm::omp::OMPD_target_teams_distribute,
+        llvm::omp::OMPD_teams_distribute_parallel_do_simd,
+        llvm::omp::OMPD_teams_distribute_parallel_do,
+        llvm::omp::OMPD_teams_distribute_simd,
+        llvm::omp::OMPD_teams_distribute,
+    } | topDistributeSet,
 };
 
 //===----------------------------------------------------------------------===//
@@ -188,8 +200,16 @@ static const OmpDirectiveSet loopConstructSet{
     Directive::OMPD_distribute,
     Directive::OMPD_do_simd,
     Directive::OMPD_do,
+    Directive::OMPD_masked_taskloop,
+    Directive::OMPD_masked_taskloop_simd,
+    Directive::OMPD_master_taskloop,
+    Directive::OMPD_master_taskloop_simd,
     Directive::OMPD_parallel_do_simd,
     Directive::OMPD_parallel_do,
+    Directive::OMPD_parallel_masked_taskloop,
+    Directive::OMPD_parallel_masked_taskloop_simd,
+    Directive::OMPD_parallel_master_taskloop,
+    Directive::OMPD_parallel_master_taskloop_simd,
     Directive::OMPD_simd,
     Directive::OMPD_target_parallel_do_simd,
     Directive::OMPD_target_parallel_do,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 833989cdc7023..4cb2aa74e1791 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -230,17 +230,30 @@ addUseDeviceClause(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
+                              mlir::Location loc, CollapseClauseOps &ops,
+                              std::size_t loopVarTypeSize) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  // The types of lower bound, upper bound, and step are converted into the
+  // type of the loop variable if necessary.
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  for (unsigned it = 0; it < (unsigned)ops.loopLBVar.size(); it++) {
+    ops.loopLBVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopLBVar[it]);
+    ops.loopUBVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopUBVar[it]);
+    ops.loopStepVar[it] =
+        firOpBuilder.createConvert(loc, loopVarType, ops.loopStepVar[it]);
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // ClauseProcessor unique clauses
 //===----------------------------------------------------------------------===//
 
-bool ClauseProcessor::processCollapse(
-    mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval,
-    llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-    llvm::SmallVectorImpl<mlir::Value> &upperBound,
-    llvm::SmallVectorImpl<mlir::Value> &step,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &iv,
-    std::size_t &loopVarTypeSize) const {
+bool ClauseProcessor::processCollapse(mlir::Location currentLocation,
+                                      Fortran::lower::pft::Evaluation &eval,
+                                      CollapseClauseOps &ops) const {
   bool found = false;
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
@@ -259,7 +272,7 @@ bool ClauseProcessor::processCollapse(
     found = true;
   }
 
-  loopVarTypeSize = 0;
+  std::size_t loopVarTypeSize = 0;
   do {
     Fortran::lower::pft::Evaluation *doLoop =
         &doConstructEval->getFirstNestedEvaluation();
@@ -271,18 +284,18 @@ bool ClauseProcessor::processCollapse(
         std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
     assert(bounds && "Expected bounds for worksharing do loop");
     Fortran::lower::StatementContext stmtCtx;
-    lowerBound.push_back(fir::getBase(converter.genExprValue(
+    ops.loopLBVar.push_back(fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
-    upperBound.push_back(fir::getBase(converter.genExprValue(
+    ops.loopUBVar.push_back(fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
     if (bounds->step) {
-      step.push_back(fir::getBase(converter.genExprValue(
+      ops.loopStepVar.push_back(fir::getBase(converter.genExprValue(
           *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
     } else { // If `step` is not present, assume it as `1`.
-      step.push_back(firOpBuilder.createIntegerConstant(
+      ops.loopStepVar.push_back(firOpBuilder.createIntegerConstant(
           currentLocation, firOpBuilder.getIntegerType(32), 1));
     }
-    iv.push_back(bounds->name.thing.symbol);
+    ops.loopIV.push_back(bounds->name.thing.symbol);
     loopVarTypeSize = std::max(loopVarTypeSize,
                                bounds->name.thing.symbol->GetUltimate().size());
     collapseValue--;
@@ -290,6 +303,9 @@ bool ClauseProcessor::processCollapse(
         &*std::next(doConstructEval->getNestedEvaluations().begin());
   } while (collapseValue > 0);
 
+  if (found)
+    convertLoopBounds(converter, currentLocation, ops, loopVarTypeSize);
+
   return found;
 }
 
@@ -316,7 +332,7 @@ bool ClauseProcessor::processDefault() const {
 }
 
 bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
-                                    mlir::Value &result) const {
+                                    DeviceClauseOps &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
   if (auto *deviceClause = findUniqueClause<ClauseTy::Device>(&source)) {
     mlir::Location clauseLocation = converter.genLocation(*source);
@@ -330,26 +346,26 @@ bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx,
     }
     if (const auto *deviceExpr = Fortran::semantics::GetExpr(
             std::get<Fortran::parser::ScalarIntExpr>(deviceClause->v.t))) {
-      result = fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx));
+      result.deviceVar =
+          fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx));
     }
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processDeviceType(
-    mlir::omp::DeclareTargetDeviceType &result) const {
+bool ClauseProcessor::processDeviceType(DeviceTypeClauseOps &result) const {
   if (auto *deviceTypeClause = findUniqueClause<ClauseTy::DeviceType>()) {
     // Case: declare target ... device_type(any | host | nohost)
     switch (deviceTypeClause->v.v) {
     case Fortran::parser::OmpDeviceTypeClause::Type::Nohost:
-      result = mlir::omp::DeclareTargetDeviceType::nohost;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::nohost;
       break;
     case Fortran::parser::OmpDeviceTypeClause::Type::Host:
-      result = mlir::omp::DeclareTargetDeviceType::host;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::host;
       break;
     case Fortran::parser::OmpDeviceTypeClause::Type::Any:
-      result = mlir::omp::DeclareTargetDeviceType::any;
+      result.deviceType = mlir::omp::DeclareTargetDeviceType::any;
       break;
     }
     return true;
@@ -358,7 +374,7 @@ bool ClauseProcessor::processDeviceType(
 }
 
 bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
-                                   mlir::Value &result) const {
+                                   FinalClauseOps &result) const {
   const Fortran::parser::CharBlock *source = nullptr;
   if (auto *finalClause = findUniqueClause<ClauseTy::Final>(&source)) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -366,38 +382,38 @@ bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx,
 
     mlir::Value finalVal = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(finalClause->v), stmtCtx));
-    result = firOpBuilder.createConvert(clauseLocation,
-                                        firOpBuilder.getI1Type(), finalVal);
+    result.finalVar = firOpBuilder.createConvert(
+        clauseLocation, firOpBuilder.getI1Type(), finalVal);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processHint(HintClauseOps &result) const {
   if (auto *hintClause = findUniqueClause<ClauseTy::Hint>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(hintClause->v);
     int64_t hintValue = *Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(hintValue);
+    result.hintAttr = firOpBuilder.getI64IntegerAttr(hintValue);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processMergeable(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Mergeable>(result);
+bool ClauseProcessor::processMergeable(MergeableClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Mergeable>(result.mergeableAttr);
 }
 
-bool ClauseProcessor::processNowait(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Nowait>(result);
+bool ClauseProcessor::processNowait(NowaitClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Nowait>(result.nowaitAttr);
 }
 
 bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
-                                      mlir::Value &result) const {
+                                      NumTeamsClauseOps &result) const {
   // TODO Get lower and upper bounds for num_teams when parser is updated to
   // accept both.
   if (auto *numTeamsClause = findUniqueClause<ClauseTy::NumTeams>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.numTeamsUpperVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(numTeamsClause->v), stmtCtx));
     return true;
   }
@@ -405,17 +421,18 @@ bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx,
 }
 
 bool ClauseProcessor::processNumThreads(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
+    Fortran::lower::StatementContext &stmtCtx,
+    NumThreadsClauseOps &result) const {
   if (auto *numThreadsClause = findUniqueClause<ClauseTy::NumThreads>()) {
     // OMPIRBuilder expects `NUM_THREADS` clause as a `Value`.
-    result = fir::getBase(converter.genExprValue(
+    result.numThreadsVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processOrdered(OrderedClauseOps &result) const {
   if (auto *orderedClause = findUniqueClause<ClauseTy::Ordered>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     int64_t orderedClauseValue = 0l;
@@ -423,48 +440,45 @@ bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const {
       const auto *expr = Fortran::semantics::GetExpr(orderedClause->v);
       orderedClauseValue = *Fortran::evaluate::ToInt64(*expr);
     }
-    result = firOpBuilder.getI64IntegerAttr(orderedClauseValue);
+    result.orderedAttr = firOpBuilder.getI64IntegerAttr(orderedClauseValue);
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx,
-                                      mlir::Value &result) const {
+                                      PriorityClauseOps &result) const {
   if (auto *priorityClause = findUniqueClause<ClauseTy::Priority>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.priorityVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(priorityClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processProcBind(
-    mlir::omp::ClauseProcBindKindAttr &result) const {
+bool ClauseProcessor::processProcBind(ProcBindClauseOps &result) const {
   if (auto *procBindClause = findUniqueClause<ClauseTy::ProcBind>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-    result = genProcBindKindAttr(firOpBuilder, procBindClause);
+    result.procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processSafelen(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processSafelen(SafelenClauseOps &result) const {
   if (auto *safelenClause = findUniqueClause<ClauseTy::Safelen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(safelenClause->v);
     const std::optional<std::int64_t> safelenVal =
         Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(*safelenVal);
+    result.safelenAttr = firOpBuilder.getI64IntegerAttr(*safelenVal);
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processSchedule(
-    mlir::omp::ClauseScheduleKindAttr &valAttr,
-    mlir::omp::ScheduleModifierAttr &modifierAttr,
-    mlir::UnitAttr &simdModifierAttr) const {
+bool ClauseProcessor::processSchedule(Fortran::lower::StatementContext &stmtCtx,
+                                      ScheduleClauseOps &result) const {
   if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     mlir::MLIRContext *context = firOpBuilder.getContext();
@@ -496,26 +510,21 @@ bool ClauseProcessor::processSchedule(
         getScheduleModifier(scheduleClause->v);
 
     if (scheduleModifier != mlir::omp::ScheduleModifier::none)
-      modifierAttr =
+      result.scheduleModAttr =
           mlir::omp::ScheduleModifierAttr::get(context, scheduleModifier);
 
     if (getSimdModifier(scheduleClause->v) != mlir::omp::ScheduleModifier::none)
-      simdModifierAttr = firOpBuilder.getUnitAttr();
+      result.scheduleSimdAttr = firOpBuilder.getUnitAttr();
 
-    valAttr = mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind);
-    return true;
-  }
-  return false;
-}
+    result.scheduleValAttr =
+        mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind);
 
-bool ClauseProcessor::processScheduleChunk(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
-  if (auto *scheduleClause = findUniqueClause<ClauseTy::Schedule>()) {
     if (const auto &chunkExpr =
             std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
                 scheduleClause->v.t)) {
       if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) {
-        result = fir::getBase(converter.genExprValue(*expr, stmtCtx));
+        result.scheduleChunkVar =
+            fir::getBase(converter.genExprValue(*expr, stmtCtx));
       }
     }
     return true;
@@ -523,48 +532,47 @@ bool ClauseProcessor::processScheduleChunk(
   return false;
 }
 
-bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const {
+bool ClauseProcessor::processSimdlen(SimdlenClauseOps &result) const {
   if (auto *simdlenClause = findUniqueClause<ClauseTy::Simdlen>()) {
     fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
     const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v);
     const std::optional<std::int64_t> simdlenVal =
         Fortran::evaluate::ToInt64(*expr);
-    result = firOpBuilder.getI64IntegerAttr(*simdlenVal);
+    result.simdlenAttr = firOpBuilder.getI64IntegerAttr(*simdlenVal);
     return true;
   }
   return false;
 }
 
 bool ClauseProcessor::processThreadLimit(
-    Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const {
+    Fortran::lower::StatementContext &stmtCtx,
+    ThreadLimitClauseOps &result) const {
   if (auto *threadLmtClause = findUniqueClause<ClauseTy::ThreadLimit>()) {
-    result = fir::getBase(converter.genExprValue(
+    result.threadLimitVar = fir::getBase(converter.genExprValue(
         *Fortran::semantics::GetExpr(threadLmtClause->v), stmtCtx));
     return true;
   }
   return false;
 }
 
-bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const {
-  return markClauseOccurrence<ClauseTy::Untied>(result);
+bool ClauseProcessor::processUntied(UntiedClauseOps &result) const {
+  return markClauseOccurrence<ClauseTy::Untied>(result.untiedAttr);
 }
 
 //===----------------------------------------------------------------------===//
 // ClauseProcessor repeatable clauses
 //===----------------------------------------------------------------------===//
 
-bool ClauseProcessor::processAllocate(
-    llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
-    llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const {
+bool ClauseProcessor::processAllocate(AllocateClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Allocate>(
       [&](const ClauseTy::Allocate *allocateClause,
           const Fortran::parser::CharBlock &) {
-        genAllocateClause(converter, allocateClause->v, allocatorOperands,
-                          allocateOperands);
+        genAllocateClause(converter, allocateClause->v, result.allocatorVars,
+                          result.allocateVars);
       });
 }
 
-bool ClauseProcessor::processCopyin() const {
+bool ClauseProcessor::processCopyin(CopyinClauseOps &) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
@@ -709,10 +717,8 @@ createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter,
   return funcOp;
 }
 
-bool ClauseProcessor::processCopyPrivate(
-    mlir::Location currentLocation,
-    llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
-    llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const {
+bool ClauseProcessor::processCopyprivate(mlir::Location currentLocation,
+                                         CopyprivateClauseOps &result) const {
   auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) {
     mlir::Value symVal = converter.getSymbolAddress(*sym);
     auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>();
@@ -739,10 +745,10 @@ bool ClauseProcessor::processCopyPrivate(
       cpVar = alloca;
     }
 
-    copyPrivateVars.push_back(cpVar);
+    result.copyprivateVars.push_back(cpVar);
     mlir::func::FuncOp funcOp =
         createCopyFunc(currentLocation, converter, cpVar.getType(), attrs);
-    copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
+    result.copyprivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp));
   };
 
   bool hasCopyPrivate = findRepeatableClause<ClauseTy::Copyprivate>(
@@ -765,9 +771,7 @@ bool ClauseProcessor::processCopyPrivate(
   return hasCopyPrivate;
 }
 
-bool ClauseProcessor::processDepend(
-    llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
-    llvm::SmallVectorImpl<mlir::Value> &dependOperands) const {
+bool ClauseProcessor::processDepend(DependClauseOps &result) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   return findRepeatableClause<ClauseTy::Depend>(
@@ -780,8 +784,8 @@ bool ClauseProcessor::processDepend(
                     .t);
         mlir::omp::ClauseTaskDependAttr dependTypeOperand =
             genDependKindAttr(firOpBuilder, dependClause);
-        dependTypeOperands.insert(dependTypeOperands.end(), depVal.size(),
-                                  dependTypeOperand);
+        result.dependTypeAttrs.insert(result.dependTypeAttrs.end(),
+                                      depVal.size(), dependTypeOperand);
         for (const Fortran::parser::Designator &ompObject : depVal) {
           Fortran::semantics::Symbol *sym = nullptr;
           std::visit(
@@ -803,14 +807,14 @@ bool ClauseProcessor::processDepend(
                   }},
               (ompObject).u);
           const mlir::Value variable = converter.getSymbolAddress(*sym);
-          dependOperands.push_back(variable);
+          result.dependVars.push_back(variable);
         }
       });
 }
 
 bool ClauseProcessor::processIf(
     Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-    mlir::Value &result) const {
+    IfClauseOps &result) const {
   bool found = false;
   findRepeatableClause<ClauseTy::If>(
       [&](const ClauseTy::If *ifClause,
@@ -821,21 +825,21 @@ bool ClauseProcessor::processIf(
         // Assume that, at most, a single 'if' clause will be applicable to the
         // given directive.
         if (operand) {
-          result = operand;
+          result.ifVar = operand;
           found = true;
         }
       });
   return found;
 }
 
-bool ClauseProcessor::processLink(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processLink(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Link>(
       [&](const ClauseTy::Link *linkClause,
           const Fortran::parser::CharBlock &) {
         // Case: declare target link(var1, var2)...
-        gatherFuncAndVarSyms(
-            linkClause->v, mlir::omp::DeclareTargetCaptureClause::link, result);
+        gatherFuncAndVarSyms(linkClause->v,
+                             mlir::omp::DeclareTargetCaptureClause::link,
+                             result.symbolAndClause);
       });
 }
 
@@ -863,14 +867,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc,
   return op;
 }
 
-bool ClauseProcessor::processMap(
-    mlir::Location currentLocation, const llvm::omp::Directive &directive,
-    Fortran::lower::StatementContext &stmtCtx,
-    llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-    llvm::SmallVectorImpl<mlir::Type> *mapSymTypes,
-    llvm::SmallVectorImpl<mlir::Location> *mapSymLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *mapSymbols)
-    const {
+bool ClauseProcessor::processMap(mlir::Location currentLocation,
+                                 Fortran::lower::StatementContext &stmtCtx,
+                                 MapClauseOps &result) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   return findRepeatableClause<ClauseTy::Map>(
       [&](const ClauseTy::Map *mapClause,
@@ -946,100 +945,92 @@ bool ClauseProcessor::processMap(
                   mapTypeBits),
               mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
 
-          mapOperands.push_back(mapOp);
-          if (mapSymTypes)
-            mapSymTypes->push_back(symAddr.getType());
-          if (mapSymLocs)
-            mapSymLocs->push_back(symAddr.getLoc());
-
-          if (mapSymbols)
-            mapSymbols->push_back(getOmpObjectSymbol(ompObject));
+          result.mapVars.push_back(mapOp);
+          if (result.mapSymTypes)
+            result.mapSymTypes->push_back(symAddr.getType());
+          if (result.mapSymLocs)
+            result.mapSymLocs->push_back(symAddr.getLoc());
+          if (result.mapSymbols)
+            result.mapSymbols->push_back(getOmpObjectSymbol(ompObject));
         }
       });
 }
 
 bool ClauseProcessor::processTargetReduction(
-    llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
-    const {
+    TargetReductionClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *reductionClause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
-        rp.addReductionSym(reductionClause->v, reductionSymbols);
+        rp.addReductionSym(reductionClause->v, result.targetReductionSymbols);
       });
 }
 
-bool ClauseProcessor::processReduction(
-    mlir::Location currentLocation,
-    llvm::SmallVectorImpl<mlir::Value> &reductionVars,
-    llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols)
-    const {
+bool ClauseProcessor::processReduction(mlir::Location currentLocation,
+                                       ReductionClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *reductionClause,
           const Fortran::parser::CharBlock &) {
         ReductionProcessor rp;
         rp.addReductionDecl(currentLocation, converter, reductionClause->v,
-                            reductionVars, reductionDeclSymbols,
-                            reductionSymbols);
+                            result.reductionVars, result.reductionDeclSymbols,
+                            result.reductionSymbols ? &*result.reductionSymbols
+                                                    : nullptr);
+        result.reductionTypes.reserve(result.reductionVars.size());
+        llvm::transform(result.reductionVars,
+                        std::back_inserter(result.reductionTypes),
+                        [](mlir::Value v) { return v.getType(); });
       });
 }
 
-bool ClauseProcessor::processSectionsReduction(
-    mlir::Location currentLocation) const {
+bool ClauseProcessor::processSectionsReduction(mlir::Location currentLocation,
+                                               ReductionClauseOps &) const {
   return findRepeatableClause<ClauseTy::Reduction>(
       [&](const ClauseTy::Reduction *, const Fortran::parser::CharBlock &) {
+        // Either implement special handling or remove this method and use the
+        // generic processReduction() method instead.
         TODO(currentLocation, "OMPC_Reduction");
       });
 }
 
-bool ClauseProcessor::processTo(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processTo(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::To>(
       [&](const ClauseTy::To *toClause, const Fortran::parser::CharBlock &) {
         // Case: declare target to(func, var1, var2)...
         gatherFuncAndVarSyms(toClause->v,
-                             mlir::omp::DeclareTargetCaptureClause::to, result);
+                             mlir::omp::DeclareTargetCaptureClause::to,
+                             result.symbolAndClause);
       });
 }
 
-bool ClauseProcessor::processEnter(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+bool ClauseProcessor::processEnter(EnterLinkToClauseOps &result) const {
   return findRepeatableClause<ClauseTy::Enter>(
       [&](const ClauseTy::Enter *enterClause,
           const Fortran::parser::CharBlock &) {
         // Case: declare target enter(func, var1, var2)...
         gatherFuncAndVarSyms(enterClause->v,
                              mlir::omp::DeclareTargetCaptureClause::enter,
-                             result);
+                             result.symbolAndClause);
       });
 }
 
-bool ClauseProcessor::processUseDeviceAddr(
-    llvm::SmallVectorImpl<mlir::Value> &operands,
-    llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-    llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
-    const {
+bool ClauseProcessor::processUseDeviceAddr(UseDeviceClauseOps &result) const {
   return findRepeatableClause<ClauseTy::UseDeviceAddr>(
       [&](const ClauseTy::UseDeviceAddr *devAddrClause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devAddrClause->v, operands,
-                           useDeviceTypes, useDeviceLocs, useDeviceSymbols);
+        addUseDeviceClause(converter, devAddrClause->v,
+                           result.useDeviceAddrVars, result.useDeviceTypes,
+                           result.useDeviceLocs, result.useDeviceSymbols);
       });
 }
 
-bool ClauseProcessor::processUseDevicePtr(
-    llvm::SmallVectorImpl<mlir::Value> &operands,
-    llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-    llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSymbols)
-    const {
+bool ClauseProcessor::processUseDevicePtr(UseDeviceClauseOps &result) const {
   return findRepeatableClause<ClauseTy::UseDevicePtr>(
       [&](const ClauseTy::UseDevicePtr *devPtrClause,
           const Fortran::parser::CharBlock &) {
-        addUseDeviceClause(converter, devPtrClause->v, operands, useDeviceTypes,
-                           useDeviceLocs, useDeviceSymbols);
+        addUseDeviceClause(converter, devPtrClause->v, result.useDevicePtrVars,
+                           result.useDeviceTypes, result.useDeviceLocs,
+                           result.useDeviceSymbols);
       });
 }
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 0b91aca3d1344..c6b0b73dd1a56 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -13,6 +13,7 @@
 #define FORTRAN_LOWER_CLAUASEPROCESSOR_H
 
 #include "DirectivesCommon.h"
+#include "OperationClauses.h"
 #include "ReductionProcessor.h"
 #include "Utils.h"
 #include "flang/Lower/AbstractConverter.h"
@@ -54,101 +55,62 @@ class ClauseProcessor {
       : converter(converter), semaCtx(semaCtx), clauses(clauses) {}
 
   // 'Unique' clauses: They can appear at most once in the clause list.
-  bool
-  processCollapse(mlir::Location currentLocation,
-                  Fortran::lower::pft::Evaluation &eval,
-                  llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-                  llvm::SmallVectorImpl<mlir::Value> &upperBound,
-                  llvm::SmallVectorImpl<mlir::Value> &step,
-                  llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &iv,
-                  std::size_t &loopVarTypeSize) const;
+  bool processCollapse(mlir::Location currentLocation,
+                       Fortran::lower::pft::Evaluation &eval,
+                       CollapseClauseOps &result) const;
   bool processDefault() const;
   bool processDevice(Fortran::lower::StatementContext &stmtCtx,
-                     mlir::Value &result) const;
-  bool processDeviceType(mlir::omp::DeclareTargetDeviceType &result) const;
+                     DeviceClauseOps &result) const;
+  bool processDeviceType(DeviceTypeClauseOps &result) const;
   bool processFinal(Fortran::lower::StatementContext &stmtCtx,
-                    mlir::Value &result) const;
-  bool processHint(mlir::IntegerAttr &result) const;
-  bool processMergeable(mlir::UnitAttr &result) const;
-  bool processNowait(mlir::UnitAttr &result) const;
+                    FinalClauseOps &result) const;
+  bool processHint(HintClauseOps &result) const;
+  bool processMergeable(MergeableClauseOps &result) const;
+  bool processNowait(NowaitClauseOps &result) const;
   bool processNumTeams(Fortran::lower::StatementContext &stmtCtx,
-                       mlir::Value &result) const;
+                       NumTeamsClauseOps &result) const;
   bool processNumThreads(Fortran::lower::StatementContext &stmtCtx,
-                         mlir::Value &result) const;
-  bool processOrdered(mlir::IntegerAttr &result) const;
+                         NumThreadsClauseOps &result) const;
+  bool processOrdered(OrderedClauseOps &result) const;
   bool processPriority(Fortran::lower::StatementContext &stmtCtx,
-                       mlir::Value &result) const;
-  bool processProcBind(mlir::omp::ClauseProcBindKindAttr &result) const;
-  bool processSafelen(mlir::IntegerAttr &result) const;
-  bool processSchedule(mlir::omp::ClauseScheduleKindAttr &valAttr,
-                       mlir::omp::ScheduleModifierAttr &modifierAttr,
-                       mlir::UnitAttr &simdModifierAttr) const;
-  bool processScheduleChunk(Fortran::lower::StatementContext &stmtCtx,
-                            mlir::Value &result) const;
-  bool processSimdlen(mlir::IntegerAttr &result) const;
+                       PriorityClauseOps &result) const;
+  bool processProcBind(ProcBindClauseOps &result) const;
+  bool processSafelen(SafelenClauseOps &result) const;
+  bool processSchedule(Fortran::lower::StatementContext &stmtCtx,
+                       ScheduleClauseOps &result) const;
+  bool processSimdlen(SimdlenClauseOps &result) const;
   bool processThreadLimit(Fortran::lower::StatementContext &stmtCtx,
-                          mlir::Value &result) const;
-  bool processUntied(mlir::UnitAttr &result) const;
+                          ThreadLimitClauseOps &result) const;
+  bool processUntied(UntiedClauseOps &result) const;
 
   // 'Repeatable' clauses: They can appear multiple times in the clause list.
-  bool
-  processAllocate(llvm::SmallVectorImpl<mlir::Value> &allocatorOperands,
-                  llvm::SmallVectorImpl<mlir::Value> &allocateOperands) const;
-  bool processCopyin() const;
-  bool processCopyPrivate(
-      mlir::Location currentLocation,
-      llvm::SmallVectorImpl<mlir::Value> &copyPrivateVars,
-      llvm::SmallVectorImpl<mlir::Attribute> &copyPrivateFuncs) const;
-  bool processDepend(llvm::SmallVectorImpl<mlir::Attribute> &dependTypeOperands,
-                     llvm::SmallVectorImpl<mlir::Value> &dependOperands) const;
-  bool
-  processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  bool processAllocate(AllocateClauseOps &result) const;
+  bool processCopyin(CopyinClauseOps &result) const;
+  bool processCopyprivate(mlir::Location currentLocation,
+                          CopyprivateClauseOps &result) const;
+  bool processDepend(DependClauseOps &result) const;
+  bool processEnter(EnterLinkToClauseOps &result) const;
   bool
   processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName,
-            mlir::Value &result) const;
-  bool
-  processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+            IfClauseOps &result) const;
+  bool processLink(EnterLinkToClauseOps &result) const;
 
   // This method is used to process a map clause.
-  // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to
-  // store the original type, location and Fortran symbol for the map operands.
-  // They may be used later on to create the block_arguments for some of the
-  // target directives that require it.
   bool processMap(mlir::Location currentLocation,
-                  const llvm::omp::Directive &directive,
                   Fortran::lower::StatementContext &stmtCtx,
-                  llvm::SmallVectorImpl<mlir::Value> &mapOperands,
-                  llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr,
-                  llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr,
-                  llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                      *mapSymbols = nullptr) const;
-  bool
-  processReduction(mlir::Location currentLocation,
-                   llvm::SmallVectorImpl<mlir::Value> &reductionVars,
-                   llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-                   llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                       *reductionSymbols = nullptr) const;
-  bool processTargetReduction(
-      llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionSymbols)
-      const;
-  bool processSectionsReduction(mlir::Location currentLocation) const;
-  bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool
-  processUseDeviceAddr(llvm::SmallVectorImpl<mlir::Value> &operands,
-                       llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                       llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                       llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                           &useDeviceSymbols) const;
-  bool
-  processUseDevicePtr(llvm::SmallVectorImpl<mlir::Value> &operands,
-                      llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
-                      llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
-                      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
-                          &useDeviceSymbols) const;
+                  MapClauseOps &result) const;
+  bool processReduction(mlir::Location currentLocation,
+                        ReductionClauseOps &result) const;
+  bool processTargetReduction(TargetReductionClauseOps &result) const;
+  bool processSectionsReduction(mlir::Location currentLocation,
+                                ReductionClauseOps &result) const;
+  bool processTo(EnterLinkToClauseOps &result) const;
+  bool processUseDeviceAddr(UseDeviceClauseOps &result) const;
+  bool processUseDevicePtr(UseDeviceClauseOps &result) const;
 
   template <typename T>
   bool processMotionClauses(Fortran::lower::StatementContext &stmtCtx,
-                            llvm::SmallVectorImpl<mlir::Value> &mapOperands);
+                            MapClauseOps &result);
 
   // Call this method for these clauses that should be supported but are not
   // implemented yet. It triggers a compilation error if any of the given
@@ -189,8 +151,7 @@ class ClauseProcessor {
 
 template <typename T>
 bool ClauseProcessor::processMotionClauses(
-    Fortran::lower::StatementContext &stmtCtx,
-    llvm::SmallVectorImpl<mlir::Value> &mapOperands) {
+    Fortran::lower::StatementContext &stmtCtx, MapClauseOps &result) {
   return findRepeatableClause<T>(
       [&](const T *motionClause, const Fortran::parser::CharBlock &source) {
         mlir::Location clauseLocation = converter.genLocation(source);
@@ -232,7 +193,7 @@ bool ClauseProcessor::processMotionClauses(
                   mapTypeBits),
               mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());
 
-          mapOperands.push_back(mapOp);
+          result.mapVars.push_back(mapOp);
         }
       });
 }
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a3..5c27a91bd469f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -213,21 +213,23 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
             firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP);
           }
         }
-      } else if (mlir::isa<mlir::omp::WsLoopOp>(op)) {
-        // Update the original variable just before exiting the worksharing
-        // loop. Conversion as follows:
+      } else if (mlir::isa<mlir::omp::LoopNestOp>(op)) {
+        // TODO Check that the change from WsLoopOp to LoopNestOp didn't
+        // break anything here.
+        // Update the original variable just before exiting the loop. Conversion
+        // as follows:
         //
-        //                       omp.wsloop {
-        // omp.wsloop {            ...
-        //    ...                  store
-        //    store       ===>     %v = arith.addi %iv, %step
-        //    omp.yield            %cmp = %step < 0 ? %v < %ub : %v > %ub
-        // }                       fir.if %cmp {
-        //                           fir.store %v to %loopIV
-        //                           ^%lpv_update_blk:
-        //                         }
-        //                         omp.yield
+        //                     omp.loopnest {
+        // omp.loopnest {        ...
+        //    ...                store
+        //    store       ===>   %v = arith.addi %iv, %step
+        //    omp.yield          %cmp = %step < 0 ? %v < %ub : %v > %ub
+        // }                     fir.if %cmp {
+        //                         fir.store %v to %loopIV
+        //                         ^%lpv_update_blk:
         //                       }
+        //                       omp.yield
+        //                     }
         //
 
         // Only generate the compare once in presence of multiple LastPrivate
@@ -242,8 +244,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
 
         mlir::Value iv = op->getRegion(0).front().getArguments()[0];
         mlir::Value ub =
-            mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getUpperBound()[0];
-        mlir::Value step = mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getStep()[0];
+            mlir::cast<mlir::omp::LoopNestOp>(op).getUpperBound()[0];
+        mlir::Value step = mlir::cast<mlir::omp::LoopNestOp>(op).getStep()[0];
 
         // v = iv + step
         // cmp = step < 0 ? v < ub : v > ub
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 9f7301df07598..c22d1e966df90 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -24,6 +24,7 @@ namespace omp {
 
 class DataSharingProcessor {
 public:
+  // TODO Replace with PrivateClauseOps.
   /// Collects all the information needed for delayed privatization. This can be
   /// used by ops with data-sharing clauses to properly generate their regions
   /// (e.g. add region arguments) and map the original SSA values to their
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index e499e16c19e04..fdb8ef4977bc9 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -38,38 +38,6 @@
 
 using namespace Fortran::lower::omp;
 
-//===----------------------------------------------------------------------===//
-// Code generation helper functions
-//===----------------------------------------------------------------------===//
-
-static Fortran::lower::pft::Evaluation *
-getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
-  // Return the Evaluation of the innermost collapsed loop, or the current one
-  // if there was no COLLAPSE.
-  if (collapseValue == 0)
-    return &eval;
-
-  Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
-  for (int i = 1; i < collapseValue; i++) {
-    // The nested evaluations should be DoConstructs (i.e. they should form
-    // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
-    // EndDoStmt>.
-    assert(curEval->isA<Fortran::parser::DoConstruct>());
-    curEval = &*std::next(curEval->getNestedEvaluations().begin());
-  }
-  return curEval;
-}
-
-static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
-                                 Fortran::lower::pft::Evaluation &eval,
-                                 int collapseValue = 0) {
-  Fortran::lower::pft::Evaluation *curEval =
-      getCollapsedLoopEval(eval, collapseValue);
-
-  for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
-    converter.genEval(e);
-}
-
 //===----------------------------------------------------------------------===//
 // HostClausesInsertionGuard
 //===----------------------------------------------------------------------===//
@@ -139,6 +107,113 @@ class HostClausesInsertionGuard {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// OpWithBodyGenInfo
+//===----------------------------------------------------------------------===//
+
+struct OpWithBodyGenInfo {
+  /// A type for a code-gen callback function. This takes as argument the op for
+  /// which the code is being generated and returns the arguments of the op's
+  /// region.
+  using GenOMPRegionEntryCBFn =
+      std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
+          mlir::Operation *)>;
+
+  OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
+                    Fortran::semantics::SemanticsContext &semaCtx,
+                    mlir::Location loc, Fortran::lower::pft::Evaluation &eval)
+      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {}
+
+  OpWithBodyGenInfo &setGenNested(bool value) {
+    genNested = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setOuterCombined(bool value) {
+    outerCombined = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) {
+    clauses = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
+    dsp = value;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &
+  setReductions(llvm::ArrayRef<const Fortran::semantics::Symbol *> symbols,
+                llvm::ArrayRef<mlir::Type> types) {
+    reductionSymbols = symbols;
+    reductionTypes = types;
+    return *this;
+  }
+
+  OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
+    genRegionEntryCB = value;
+    return *this;
+  }
+
+  /// [inout] converter to use for the clauses.
+  Fortran::lower::AbstractConverter &converter;
+  /// [in] Semantics context
+  Fortran::semantics::SemanticsContext &semaCtx;
+  /// [in] location in source code.
+  mlir::Location loc;
+  /// [in] current PFT node/evaluation.
+  Fortran::lower::pft::Evaluation &eval;
+  /// [in] whether to generate FIR for nested evaluations
+  bool genNested = true;
+  /// [in] is this an outer operation - prevents privatization.
+  bool outerCombined = false;
+  /// [in] list of clauses to process.
+  const Fortran::parser::OmpClauseList *clauses = nullptr;
+  /// [in] if provided, processes the construct's data-sharing attributes.
+  DataSharingProcessor *dsp = nullptr;
+  /// [in] if provided, list of reduction symbols
+  llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionSymbols;
+  /// [in] if provided, list of reduction types
+  llvm::ArrayRef<mlir::Type> reductionTypes;
+  /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
+  /// is created in the region.
+  GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
+};
+
+//===----------------------------------------------------------------------===//
+// Code generation helper functions
+//===----------------------------------------------------------------------===//
+
+static Fortran::lower::pft::Evaluation *
+getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) {
+  // Return the Evaluation of the innermost collapsed loop, or the current one
+  // if there was no COLLAPSE.
+  if (collapseValue == 0)
+    return &eval;
+
+  Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
+  for (int i = 1; i < collapseValue; i++) {
+    // The nested evaluations should be DoConstructs (i.e. they should form
+    // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
+    // EndDoStmt>.
+    assert(curEval->isA<Fortran::parser::DoConstruct>());
+    curEval = &*std::next(curEval->getNestedEvaluations().begin());
+  }
+  return curEval;
+}
+
+static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::lower::pft::Evaluation &eval,
+                                 int collapseValue = 0) {
+  Fortran::lower::pft::Evaluation *curEval =
+      getCollapsedLoopEval(eval, collapseValue);
+
+  for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
+    converter.genEval(e);
+}
+
 static fir::GlobalOp globalInitialization(
     Fortran::lower::AbstractConverter &converter,
     fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym,
@@ -282,268 +357,80 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter,
   firOpBuilder.restoreInsertionPoint(insPt);
 }
 
-static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
-                                 std::size_t loopVarTypeSize) {
-  // OpenMP runtime requires 32-bit or 64-bit loop variables.
-  loopVarTypeSize = loopVarTypeSize * 8;
-  if (loopVarTypeSize < 32) {
-    loopVarTypeSize = 32;
-  } else if (loopVarTypeSize > 64) {
-    loopVarTypeSize = 64;
-    mlir::emitWarning(converter.getCurrentLocation(),
-                      "OpenMP loop iteration variable cannot have more than 64 "
-                      "bits size and will be narrowed into 64 bits.");
-  }
-  assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
-         "OpenMP loop iteration variable size must be transformed into 32-bit "
-         "or 64-bit");
-  return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
-}
+/// Create the body (block) for an OpenMP Operation.
+///
+/// \param [in]   op - the operation the body belongs to.
+/// \param [in] info - options controlling code-gen for the construction.
+template <typename Op>
+static void createBodyOfOp(Op &op, const OpWithBodyGenInfo &info) {
+  fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
 
-static mlir::Operation *
-createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
-                              mlir::Location loc, mlir::Value indexVal,
-                              const Fortran::semantics::Symbol *sym) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
-  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+  auto insertMarker = [](fir::FirOpBuilder &builder) {
+    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
+                                                     builder.getIndexType());
+    return undef.getDefiningOp();
+  };
 
-  mlir::Type tempTy = converter.genType(*sym);
-  mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
-      loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
-      /*shapeParams*/ mlir::ValueRange{},
-      llvm::ArrayRef<mlir::NamedAttribute>{
-          fir::getAdaptToByRefAttr(firOpBuilder)});
-  converter.bindSymbol(*sym, temp);
-  firOpBuilder.restoreInsertionPoint(insPt);
-  mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
-  mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
-      loc, cvtVal, converter.getSymbolAddress(*sym));
-  return storeOp;
-}
+  // If an argument for the region is provided then create the block with that
+  // argument. Also update the symbol's address with the mlir argument value.
+  // e.g. For loops the argument is the induction variable. And all further
+  // uses of the induction variable should use this mlir value.
+  auto regionArgs =
+      [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+    if (info.genRegionEntryCB != nullptr) {
+      return info.genRegionEntryCB(op);
+    }
 
-static mlir::Value
-calculateTripCount(Fortran::lower::AbstractConverter &converter,
-                   mlir::Location loc, llvm::ArrayRef<mlir::Value> lbs,
-                   llvm::ArrayRef<mlir::Value> ubs,
-                   llvm::ArrayRef<mlir::Value> steps) {
-  using namespace mlir::arith;
-  assert(lbs.size() == ubs.size() && lbs.size() == steps.size() &&
-         !lbs.empty() && "Invalid bounds or step");
+    firOpBuilder.createBlock(&op.getRegion());
+    return {};
+  }();
+  // Mark the earliest insertion point.
+  mlir::Operation *marker = insertMarker(firOpBuilder);
 
-  fir::FirOpBuilder &b = converter.getFirOpBuilder();
+  // If it is an unstructured region and is not the outer region of a combined
+  // construct, create empty blocks for all evaluations.
+  if (info.eval.lowerAsUnstructured() && !info.outerCombined)
+    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
+                                            mlir::omp::YieldOp>(
+        firOpBuilder, info.eval.getNestedEvaluations());
 
-  // Get the bit width of an integer-like type.
-  auto widthOf = [](mlir::Type ty) -> unsigned {
-    if (mlir::isa<mlir::IndexType>(ty)) {
-      return mlir::IndexType::kInternalStorageBitWidth;
-    }
-    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
-      return tyInt.getWidth();
-    }
-    llvm_unreachable("Unexpected type");
-  };
+  // Start with privatization, so that the lowering of the nested
+  // code will use the right symbols.
+  // TODO Check that nothing broke from replacing WsLoopOp and SimdLoopOp here.
+  constexpr bool isLoop = std::is_same_v<Op, mlir::omp::LoopNestOp>;
+  bool privatize = info.clauses && !info.outerCombined;
 
-  // For a type that is either IntegerType or IndexType, return the
-  // equivalent IntegerType. In the former case this is a no-op.
-  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
-    if (ty.isIndex()) {
-      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
+  firOpBuilder.setInsertionPoint(marker);
+  std::optional<DataSharingProcessor> tempDsp;
+  if (privatize) {
+    if (!info.dsp) {
+      tempDsp.emplace(info.converter, *info.clauses, info.eval);
+      tempDsp->processStep1();
     }
-    assert(ty.isIntOrIndex() && "Unexpected type");
-    return mlir::cast<mlir::IntegerType>(ty);
-  };
+  }
 
-  // For two given values, establish a common signless IntegerType
-  // that can represent any value of type of x and of type of y,
-  // and return the pair of x, y converted to the new type.
-  auto unifyToSignless =
-      [&](fir::FirOpBuilder &b, mlir::Value x,
-          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
-    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
-    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
-    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
-                                         mlir::IntegerType::Signless);
-    return std::make_pair(b.createConvert(loc, wideTy, x),
-                          b.createConvert(loc, wideTy, y));
-  };
+  if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
+    threadPrivatizeVars(info.converter, info.eval);
+    if (info.clauses) {
+      firOpBuilder.setInsertionPoint(marker);
+      CopyinClauseOps clauseOps;
+      ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
+          .processCopyin(clauseOps);
+    }
+  }
 
-  // Start with signless i32 by default.
-  auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1);
-
-  for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) {
-    auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0);
-    auto [step, step0] = unifyToSignless(b, origStep, tmpS0);
-    auto reverseCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
-    auto negStep = b.create<SubIOp>(loc, step0, step);
-    mlir::Value absStep = b.create<SelectOp>(loc, reverseCond, negStep, step);
-
-    auto [lb, ub] = unifyToSignless(b, origLb, origUb);
-    auto start = b.create<SelectOp>(loc, reverseCond, ub, lb);
-    auto end = b.create<SelectOp>(loc, reverseCond, lb, ub);
-
-    mlir::Value range = b.create<SubIOp>(loc, end, start);
-    auto rangeCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
-    std::tie(range, absStep) = unifyToSignless(b, range, absStep);
-    // numSteps = (range /u absStep) + 1
-    auto numSteps =
-        b.create<AddIOp>(loc, b.create<DivUIOp>(loc, range, absStep),
-                         b.createIntegerConstant(loc, range.getType(), 1));
-
-    auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0);
-    auto loopTripCount = b.create<SelectOp>(loc, rangeCond, trip0, numSteps);
-    auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount);
-    tripCount = b.create<MulIOp>(loc, totalTC, thisTC);
-  }
-
-  return tripCount;
-}
-
-struct OpWithBodyGenInfo {
-  /// A type for a code-gen callback function. This takes as argument the op for
-  /// which the code is being generated and returns the arguments of the op's
-  /// region.
-  using GenOMPRegionEntryCBFn =
-      std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>(
-          mlir::Operation *)>;
-
-  OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter,
-                    Fortran::semantics::SemanticsContext &semaCtx,
-                    mlir::Location loc, Fortran::lower::pft::Evaluation &eval)
-      : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {}
-
-  OpWithBodyGenInfo &setGenNested(bool value) {
-    genNested = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setOuterCombined(bool value) {
-    outerCombined = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) {
-    clauses = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
-    dsp = value;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &
-  setReductions(llvm::SmallVector<const Fortran::semantics::Symbol *> *value1,
-                llvm::SmallVector<mlir::Type> *value2) {
-    reductionSymbols = value1;
-    reductionTypes = value2;
-    return *this;
-  }
-
-  OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
-    genRegionEntryCB = value;
-    return *this;
-  }
-
-  /// [inout] converter to use for the clauses.
-  Fortran::lower::AbstractConverter &converter;
-  /// [in] Semantics context
-  Fortran::semantics::SemanticsContext &semaCtx;
-  /// [in] location in source code.
-  mlir::Location loc;
-  /// [in] current PFT node/evaluation.
-  Fortran::lower::pft::Evaluation &eval;
-  /// [in] whether to generate FIR for nested evaluations
-  bool genNested = true;
-  /// [in] is this an outer operation - prevents privatization.
-  bool outerCombined = false;
-  /// [in] list of clauses to process.
-  const Fortran::parser::OmpClauseList *clauses = nullptr;
-  /// [in] if provided, processes the construct's data-sharing attributes.
-  DataSharingProcessor *dsp = nullptr;
-  /// [in] if provided, list of reduction symbols
-  llvm::SmallVector<const Fortran::semantics::Symbol *> *reductionSymbols =
-      nullptr;
-  /// [in] if provided, list of reduction types
-  llvm::SmallVector<mlir::Type> *reductionTypes = nullptr;
-  /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
-  /// is created in the region.
-  GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
-};
-
-/// Create the body (block) for an OpenMP Operation.
-///
-/// \param [in]   op - the operation the body belongs to.
-/// \param [in] info - options controlling code-gen for the construction.
-template <typename Op>
-static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
-  fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
-
-  auto insertMarker = [](fir::FirOpBuilder &builder) {
-    mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
-                                                     builder.getIndexType());
-    return undef.getDefiningOp();
-  };
-
-  // If an argument for the region is provided then create the block with that
-  // argument. Also update the symbol's address with the mlir argument value.
-  // e.g. For loops the argument is the induction variable. And all further
-  // uses of the induction variable should use this mlir value.
-  auto regionArgs =
-      [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
-    if (info.genRegionEntryCB != nullptr) {
-      return info.genRegionEntryCB(op);
-    }
-
-    firOpBuilder.createBlock(&op.getRegion());
-    return {};
-  }();
-  // Mark the earliest insertion point.
-  mlir::Operation *marker = insertMarker(firOpBuilder);
-
-  // If it is an unstructured region and is not the outer region of a combined
-  // construct, create empty blocks for all evaluations.
-  if (info.eval.lowerAsUnstructured() && !info.outerCombined)
-    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
-                                            mlir::omp::YieldOp>(
-        firOpBuilder, info.eval.getNestedEvaluations());
-
-  // Start with privatization, so that the lowering of the nested
-  // code will use the right symbols.
-  constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
-                          std::is_same_v<Op, mlir::omp::SimdLoopOp>;
-  bool privatize = info.clauses && !info.outerCombined;
-
-  firOpBuilder.setInsertionPoint(marker);
-  std::optional<DataSharingProcessor> tempDsp;
-  if (privatize) {
-    if (!info.dsp) {
-      tempDsp.emplace(info.converter, *info.clauses, info.eval);
-      tempDsp->processStep1();
-    }
-  }
-
-  if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) {
-    threadPrivatizeVars(info.converter, info.eval);
-    if (info.clauses) {
-      firOpBuilder.setInsertionPoint(marker);
-      ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
-          .processCopyin();
-    }
-  }
-
-  if (info.genNested) {
-    // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
-    // a lot of complications for our approach if the terminator generation
-    // is delayed past this point. Insert a temporary terminator here, then
-    // delete it.
-    firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
-    auto *temp = Fortran::lower::genOpenMPTerminator(
-        firOpBuilder, op.getOperation(), info.loc);
-    firOpBuilder.setInsertionPointAfter(marker);
-    genNestedEvaluations(info.converter, info.eval);
-    temp->erase();
-  }
+  if (info.genNested) {
+    // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
+    // a lot of complications for our approach if the terminator generation
+    // is delayed past this point. Insert a temporary terminator here, then
+    // delete it.
+    firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
+    auto *temp = Fortran::lower::genOpenMPTerminator(
+        firOpBuilder, op.getOperation(), info.loc);
+    firOpBuilder.setInsertionPointAfter(marker);
+    genNestedEvaluations(info.converter, info.eval);
+    temp->erase();
+  }
 
   // Get or create a unique exiting block from the given region, or
   // return nullptr if there is no exiting block.
@@ -672,453 +559,43 @@ static void genBodyOfTargetDataOp(
     genNestedEvaluations(converter, eval);
 }
 
-template <typename OpTy, typename... Args>
-static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
-  auto op = info.converter.getFirOpBuilder().create<OpTy>(
-      info.loc, std::forward<Args>(args)...);
-  createBodyOfOp<OpTy>(op, info);
-  return op;
-}
-
-static mlir::omp::MasterOp
-genMasterOp(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location currentLocation) {
-  return genOpWithBody<mlir::omp::MasterOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested),
-      /*resultTypes=*/mlir::TypeRange());
-}
-
-static mlir::omp::OrderedRegionOp
-genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
-                   Fortran::semantics::SemanticsContext &semaCtx,
-                   Fortran::lower::pft::Evaluation &eval, bool genNested,
-                   mlir::Location currentLocation) {
-  return genOpWithBody<mlir::omp::OrderedRegionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested),
-      /*simd=*/false);
-}
-
-static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) {
-  return eval.parent.visit(Fortran::common::visitors{
-      [&](const Fortran::lower::pft::Program &parent) {
-        return parent.getUnits().size() + parent.getCommonBlocks().size() > 1;
-      },
-      [&](const Fortran::lower::pft::Evaluation &parent) {
-        for (auto &sibling : *parent.evaluationList)
-          if (&sibling != &eval && !sibling.isEndStmt())
-            return true;
-
-        return false;
-      },
-      [&](const auto &parent) {
-        for (auto &sibling : parent.evaluationList)
-          if (&sibling != &eval && !sibling.isEndStmt())
-            return true;
+// This functions creates a block for the body of the targetOp's region. It adds
+// all the symbols present in mapSymbols as block arguments to this block.
+static void genBodyOfTargetOp(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval, bool genNested,
+    mlir::omp::TargetOp &targetOp,
+    const llvm::SmallVector<mlir::Type> &mapSymTypes,
+    const llvm::SmallVector<mlir::Location> &mapSymLocs,
+    const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
+    const mlir::Location &currentLocation) {
+  assert(mapSymTypes.size() == mapSymLocs.size());
 
-        return false;
-      }});
-}
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::Region &region = targetOp.getRegion();
 
-static mlir::omp::ParallelOp
-genParallelOp(Fortran::lower::AbstractConverter &converter,
-              Fortran::lower::SymMap &symTable,
-              Fortran::semantics::SemanticsContext &semaCtx,
-              Fortran::lower::pft::Evaluation &eval, bool genNested,
-              mlir::Location currentLocation,
-              const Fortran::parser::OmpClauseList &clauseList,
-              bool outerCombined = false) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, numThreadsClauseOperand;
-  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      reductionVars;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
-               ifClauseOperand);
-  cp.processProcBind(procBindKindAttr);
-  cp.processDefault();
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  if (!outerCombined)
-    cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols,
-                        &reductionSymbols);
+  auto *regionBlock =
+      firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
 
-  llvm::SmallVector<mlir::Type> reductionTypes;
-  reductionTypes.reserve(reductionVars.size());
-  llvm::transform(reductionVars, std::back_inserter(reductionTypes),
-                  [](mlir::Value v) { return v.getType(); });
+  // Clones the `bounds` placing them inside the target region and returns them.
+  auto cloneBound = [&](mlir::Value bound) {
+    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
+      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
+      regionBlock->push_back(clonedOp);
+      return clonedOp->getResult(0);
 
-  auto reductionCallback = [&](mlir::Operation *op) {
-    llvm::SmallVector<mlir::Location> locs(reductionVars.size(),
-                                           currentLocation);
-    auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {},
-                                                          reductionTypes, locs);
-    for (auto [arg, prv] :
-         llvm::zip_equal(reductionSymbols, block->getArguments())) {
-      converter.bindSymbol(*arg, prv);
     }
-    return reductionSymbols;
+    TODO(converter.getCurrentLocation(),
+         "target map clause operand unsupported bound type");
   };
 
-  auto offloadModOp =
-      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
-  mlir::omp::TargetOp targetOp =
-      findParentTargetOp(converter.getFirOpBuilder());
-
-  bool mustEvalOutsideTarget =
-      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
-  if (mustEvalOutsideTarget) {
-    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-    cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
-  } else {
-    cp.processNumThreads(stmtCtx, numThreadsClauseOperand);
-  }
-
-  OpWithBodyGenInfo genInfo =
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setOuterCombined(outerCombined)
-          .setClauses(&clauseList)
-          .setReductions(&reductionSymbols, &reductionTypes)
-          .setGenRegionEntryCb(reductionCallback);
-
-  if (!enableDelayedPrivatization) {
-    auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
-        genInfo,
-        /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
-        numThreadsClauseOperand, allocateOperands, allocatorOperands,
-        reductionVars,
-        reductionDeclSymbols.empty()
-            ? nullptr
-            : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                   reductionDeclSymbols),
-        procBindKindAttr, /*private_vars=*/llvm::SmallVector<mlir::Value>{},
-        /*privatizers=*/nullptr);
-
-    if (mustEvalOutsideTarget) {
-      if (numThreadsClauseOperand)
-        targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand);
-    } else {
-      if (numThreadsClauseOperand)
-        parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand);
-    }
-
-    return parallelOp;
-  }
-
-  bool privatize = !outerCombined;
-  DataSharingProcessor dsp(converter, clauseList, eval,
-                           /*useDelayedPrivatization=*/true, &symTable);
-
-  if (privatize)
-    dsp.processStep1();
-
-  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
-
-  auto genRegionEntryCB = [&](mlir::Operation *op) {
-    auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
-
-    llvm::SmallVector<mlir::Location> reductionLocs(reductionVars.size(),
-                                                    currentLocation);
-
-    mlir::OperandRange privateVars = parallelOp.getPrivateVars();
-    mlir::Region &region = parallelOp.getRegion();
-
-    llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
-    privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
-    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
-                    [](mlir::Value v) { return v.getType(); });
-
-    llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
-    privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
-    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
-                    [](mlir::Value v) { return v.getLoc(); });
-
-    converter.getFirOpBuilder().createBlock(&region, /*insertPt=*/{},
-                                            privateVarTypes, privateVarLocs);
-
-    llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
-        reductionSymbols;
-    allSymbols.append(delayedPrivatizationInfo.symbols);
-    for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
-      converter.bindSymbol(*arg, prv);
-    }
-
-    return allSymbols;
-  };
-
-  // TODO Merge with the reduction CB.
-  genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
-
-  llvm::SmallVector<mlir::Attribute> privatizers(
-      delayedPrivatizationInfo.privatizers.begin(),
-      delayedPrivatizationInfo.privatizers.end());
-
-  auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
-      genInfo,
-      /*resultTypes=*/mlir::TypeRange(), ifClauseOperand,
-      /*num_threads_var=*/nullptr, allocateOperands, allocatorOperands,
-      reductionVars,
-      reductionDeclSymbols.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 reductionDeclSymbols),
-      procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
-      delayedPrivatizationInfo.privatizers.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 privatizers));
-
-  if (mustEvalOutsideTarget) {
-    if (numThreadsClauseOperand)
-      targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand);
-  } else {
-    if (numThreadsClauseOperand)
-      parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand);
-  }
-
-  return parallelOp;
-}
-
-static mlir::omp::SectionOp
-genSectionOp(Fortran::lower::AbstractConverter &converter,
-             Fortran::semantics::SemanticsContext &semaCtx,
-             Fortran::lower::pft::Evaluation &eval, bool genNested,
-             mlir::Location currentLocation,
-             const Fortran::parser::OmpClauseList &sectionsClauseList) {
-  // Currently only private/firstprivate clause is handled, and
-  // all privatization is done within `omp.section` operations.
-  return genOpWithBody<mlir::omp::SectionOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&sectionsClauseList));
-}
-
-static mlir::omp::SingleOp
-genSingleOp(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval, bool genNested,
-            mlir::Location currentLocation,
-            const Fortran::parser::OmpClauseList &beginClauseList,
-            const Fortran::parser::OmpClauseList &endClauseList) {
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  llvm::SmallVector<mlir::Value> copyPrivateVars;
-  llvm::SmallVector<mlir::Attribute> copyPrivateFuncs;
-  mlir::UnitAttr nowaitAttr;
-
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-
-  ClauseProcessor ecp(converter, semaCtx, endClauseList);
-  ecp.processNowait(nowaitAttr);
-  ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs);
-
-  return genOpWithBody<mlir::omp::SingleOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&beginClauseList),
-      allocateOperands, allocatorOperands, copyPrivateVars,
-      copyPrivateFuncs.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 copyPrivateFuncs),
-      nowaitAttr);
-}
-
-static mlir::omp::TaskOp
-genTaskOp(Fortran::lower::AbstractConverter &converter,
-          Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location currentLocation,
-          const Fortran::parser::OmpClauseList &clauseList) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand;
-  mlir::UnitAttr untiedAttr, mergeableAttr;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      dependOperands;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
-               ifClauseOperand);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processDefault();
-  cp.processFinal(stmtCtx, finalClauseOperand);
-  cp.processUntied(untiedAttr);
-  cp.processMergeable(mergeableAttr);
-  cp.processPriority(stmtCtx, priorityClauseOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processTODO<Fortran::parser::OmpClause::InReduction,
-                 Fortran::parser::OmpClause::Detach,
-                 Fortran::parser::OmpClause::Affinity>(
-      currentLocation, llvm::omp::Directive::OMPD_task);
-
-  return genOpWithBody<mlir::omp::TaskOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&clauseList),
-      ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr,
-      /*in_reduction_vars=*/mlir::ValueRange(),
-      /*in_reductions=*/nullptr, priorityClauseOperand,
-      dependTypeOperands.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, allocateOperands, allocatorOperands);
-}
-
-static mlir::omp::TaskGroupOp
-genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
-               Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval, bool genNested,
-               mlir::Location currentLocation,
-               const Fortran::parser::OmpClauseList &clauseList) {
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processTODO<Fortran::parser::OmpClause::TaskReduction>(
-      currentLocation, llvm::omp::Directive::OMPD_taskgroup);
-  return genOpWithBody<mlir::omp::TaskGroupOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setClauses(&clauseList),
-      /*task_reduction_vars=*/mlir::ValueRange(),
-      /*task_reductions=*/nullptr, allocateOperands, allocatorOperands);
-}
-
-static mlir::omp::DataOp
-genDataOp(Fortran::lower::AbstractConverter &converter,
-          Fortran::semantics::SemanticsContext &semaCtx,
-          Fortran::lower::pft::Evaluation &eval, bool genNested,
-          mlir::Location currentLocation,
-          const Fortran::parser::OmpClauseList &clauseList) {
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand;
-  llvm::SmallVector<mlir::Value> mapOperands, devicePtrOperands,
-      deviceAddrOperands;
-  llvm::SmallVector<mlir::Type> useDeviceTypes;
-  llvm::SmallVector<mlir::Location> useDeviceLocs;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
-               ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs,
-                         useDeviceSymbols);
-  cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs,
-                          useDeviceSymbols);
-  cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data,
-                stmtCtx, mapOperands);
-
-  auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
-      currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands,
-      deviceAddrOperands, mapOperands);
-  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp,
-                        useDeviceTypes, useDeviceLocs, useDeviceSymbols,
-                        currentLocation);
-  return dataOp;
-}
-
-template <typename OpTy>
-static OpTy
-genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
-                         Fortran::semantics::SemanticsContext &semaCtx,
-                         mlir::Location currentLocation,
-                         const Fortran::parser::OmpClauseList &clauseList) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand;
-  mlir::UnitAttr nowaitAttr;
-  llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-
-  Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName;
-  // GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
-  [[maybe_unused]] llvm::omp::Directive directive;
-  if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
-    directive = llvm::omp::Directive::OMPD_target_enter_data;
-  } else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
-    directive = llvm::omp::Directive::OMPD_target_exit_data;
-  } else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
-    directiveName =
-        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
-    directive = llvm::omp::Directive::OMPD_target_update;
-  } else {
-    return nullptr;
-  }
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(directiveName, ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processNowait(nowaitAttr);
-
-  if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
-    cp.processMotionClauses<Fortran::parser::OmpClause::To>(stmtCtx,
-                                                            mapOperands);
-    cp.processMotionClauses<Fortran::parser::OmpClause::From>(stmtCtx,
-                                                              mapOperands);
-
-  } else {
-    cp.processMap(currentLocation, directive, stmtCtx, mapOperands);
-  }
-
-  return firOpBuilder.create<OpTy>(
-      currentLocation, ifClauseOperand, deviceOperand,
-      dependTypeOperands.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, nowaitAttr, mapOperands);
-}
-
-// This functions creates a block for the body of the targetOp's region. It adds
-// all the symbols present in mapSymbols as block arguments to this block.
-static void genBodyOfTargetOp(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, bool genNested,
-    mlir::omp::TargetOp &targetOp,
-    const llvm::SmallVector<mlir::Type> &mapSymTypes,
-    const llvm::SmallVector<mlir::Location> &mapSymLocs,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
-    const mlir::Location &currentLocation) {
-  assert(mapSymTypes.size() == mapSymLocs.size());
-
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Region &region = targetOp.getRegion();
-
-  auto *regionBlock =
-      firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
-
-  // Clones the `bounds` placing them inside the target region and returns them.
-  auto cloneBound = [&](mlir::Value bound) {
-    if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
-      mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
-      regionBlock->push_back(clonedOp);
-      return clonedOp->getResult(0);
-    }
-    TODO(converter.getCurrentLocation(),
-         "target map clause operand unsupported bound type");
-  };
-
-  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
-    llvm::SmallVector<mlir::Value> clonedBounds;
-    for (mlir::Value bound : bounds)
-      clonedBounds.emplace_back(cloneBound(bound));
-    return clonedBounds;
-  };
+  auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
+    llvm::SmallVector<mlir::Value> clonedBounds;
+    for (mlir::Value bound : bounds)
+      clonedBounds.emplace_back(cloneBound(bound));
+    return clonedBounds;
+  };
 
   // Bind the symbols to their corresponding block arguments.
   for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) {
@@ -1187,54 +664,1053 @@ static void genBodyOfTargetOp(
             firOpBuilder.createTemporary(val.getLoc(), val.getType());
         firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
 
-        llvm::SmallVector<mlir::Value> bounds;
-        std::stringstream name;
-        firOpBuilder.setInsertionPoint(targetOp);
-        mlir::Value mapOp = createMapInfoOp(
-            firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(),
-            bounds, llvm::SmallVector<mlir::Value>{},
-            static_cast<
-                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
-                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
-            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
-        targetOp.getMapOperandsMutable().append(mapOp);
-        mlir::Value clonedValArg =
-            region.addArgument(copyVal.getType(), copyVal.getLoc());
-        firOpBuilder.setInsertionPointToStart(regionBlock);
-        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
-                                                       clonedValArg);
-        val.replaceUsesWithIf(
-            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
-              return use.getOwner()->getBlock() == regionBlock;
-            });
-        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
-      }
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream name;
+        firOpBuilder.setInsertionPoint(targetOp);
+        mlir::Value mapOp = createMapInfoOp(
+            firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(),
+            bounds, llvm::SmallVector<mlir::Value>{},
+            static_cast<
+                std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+                llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
+            mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
+        targetOp.getMapOperandsMutable().append(mapOp);
+        mlir::Value clonedValArg =
+            region.addArgument(copyVal.getType(), copyVal.getLoc());
+        firOpBuilder.setInsertionPointToStart(regionBlock);
+        auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
+                                                       clonedValArg);
+        val.replaceUsesWithIf(
+            loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
+              return use.getOwner()->getBlock() == regionBlock;
+            });
+        firOpBuilder.setInsertionPoint(regionBlock, savedIP);
+      }
+    }
+    valuesDefinedAbove.clear();
+    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+  }
+
+  // Insert dummy instruction to remember the insertion position. The
+  // marker will be deleted since there are not uses.
+  // In the HLFIR flow there are hlfir.declares inserted above while
+  // setting block arguments.
+  mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
+      targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+
+  // Create blocks for unstructured regions. This has to be done since
+  // blocks are initially allocated with the function as the parent region.
+  if (eval.lowerAsUnstructured()) {
+    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
+                                            mlir::omp::YieldOp>(
+        firOpBuilder, eval.getNestedEvaluations());
+  }
+
+  firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
+
+  // Create the insertion point after the marker.
+  firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
+  if (genNested)
+    genNestedEvaluations(converter, eval);
+}
+
+template <typename OpTy, typename... Args>
+static OpTy genOpWithBody(const OpWithBodyGenInfo &info, Args &&...args) {
+  auto op = info.converter.getFirOpBuilder().create<OpTy>(
+      info.loc, std::forward<Args>(args)...);
+  createBodyOfOp<OpTy>(op, info);
+  return op;
+}
+
+static mlir::Value
+calculateTripCount(Fortran::lower::AbstractConverter &converter,
+                   mlir::Location loc, llvm::ArrayRef<mlir::Value> lbs,
+                   llvm::ArrayRef<mlir::Value> ubs,
+                   llvm::ArrayRef<mlir::Value> steps) {
+  using namespace mlir::arith;
+  assert(lbs.size() == ubs.size() && lbs.size() == steps.size() &&
+         !lbs.empty() && "Invalid bounds or step");
+
+  fir::FirOpBuilder &b = converter.getFirOpBuilder();
+
+  // Get the bit width of an integer-like type.
+  auto widthOf = [](mlir::Type ty) -> unsigned {
+    if (mlir::isa<mlir::IndexType>(ty)) {
+      return mlir::IndexType::kInternalStorageBitWidth;
+    }
+    if (auto tyInt = mlir::dyn_cast<mlir::IntegerType>(ty)) {
+      return tyInt.getWidth();
+    }
+    llvm_unreachable("Unexpected type");
+  };
+
+  // For a type that is either IntegerType or IndexType, return the
+  // equivalent IntegerType. In the former case this is a no-op.
+  auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType {
+    if (ty.isIndex()) {
+      return mlir::IntegerType::get(ty.getContext(), widthOf(ty));
+    }
+    assert(ty.isIntOrIndex() && "Unexpected type");
+    return mlir::cast<mlir::IntegerType>(ty);
+  };
+
+  // For two given values, establish a common signless IntegerType
+  // that can represent any value of type of x and of type of y,
+  // and return the pair of x, y converted to the new type.
+  auto unifyToSignless =
+      [&](fir::FirOpBuilder &b, mlir::Value x,
+          mlir::Value y) -> std::pair<mlir::Value, mlir::Value> {
+    auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType());
+    unsigned width = std::max(widthOf(tyX), widthOf(tyY));
+    auto wideTy = mlir::IntegerType::get(b.getContext(), width,
+                                         mlir::IntegerType::Signless);
+    return std::make_pair(b.createConvert(loc, wideTy, x),
+                          b.createConvert(loc, wideTy, y));
+  };
+
+  // Start with signless i32 by default.
+  auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1);
+
+  for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) {
+    auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0);
+    auto [step, step0] = unifyToSignless(b, origStep, tmpS0);
+    auto reverseCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, step, step0);
+    auto negStep = b.create<SubIOp>(loc, step0, step);
+    mlir::Value absStep = b.create<SelectOp>(loc, reverseCond, negStep, step);
+
+    auto [lb, ub] = unifyToSignless(b, origLb, origUb);
+    auto start = b.create<SelectOp>(loc, reverseCond, ub, lb);
+    auto end = b.create<SelectOp>(loc, reverseCond, lb, ub);
+
+    mlir::Value range = b.create<SubIOp>(loc, end, start);
+    auto rangeCond = b.create<CmpIOp>(loc, CmpIPredicate::slt, end, start);
+    std::tie(range, absStep) = unifyToSignless(b, range, absStep);
+    // numSteps = (range /u absStep) + 1
+    auto numSteps =
+        b.create<AddIOp>(loc, b.create<DivUIOp>(loc, range, absStep),
+                         b.createIntegerConstant(loc, range.getType(), 1));
+
+    auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0);
+    auto loopTripCount = b.create<SelectOp>(loc, rangeCond, trip0, numSteps);
+    auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount);
+    tripCount = b.create<MulIOp>(loc, totalTC, thisTC);
+  }
+
+  return tripCount;
+}
+
+static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) {
+  return eval.parent.visit(Fortran::common::visitors{
+      [&](const Fortran::lower::pft::Program &parent) {
+        return parent.getUnits().size() + parent.getCommonBlocks().size() > 1;
+      },
+      [&](const Fortran::lower::pft::Evaluation &parent) {
+        for (auto &sibling : *parent.evaluationList)
+          if (&sibling != &eval && !sibling.isEndStmt())
+            return true;
+
+        return false;
+      },
+      [&](const auto &parent) {
+        for (auto &sibling : parent.evaluationList)
+          if (&sibling != &eval && !sibling.isEndStmt())
+            return true;
+
+        return false;
+      }});
+}
+
+/// Extract the list of function and variable symbols affected by the given
+/// 'declare target' directive and return the intended device type for them.
+static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
+    EnterLinkToClauseOps &enterLinkToClauseOps) {
+
+  // The default capture type
+  DeviceTypeClauseOps deviceTypeClauseOps = {
+      mlir::omp::DeclareTargetDeviceType::any};
+  const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>(
+      declareTargetConstruct.t);
+
+  if (const auto *objectList{
+          Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
+    // Case: declare target(func, var1, var2)
+    gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
+                         enterLinkToClauseOps.symbolAndClause);
+  } else if (const auto *clauseList{
+                 Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
+                     spec.u)}) {
+    if (clauseList->v.empty()) {
+      // Case: declare target, implicit capture of function
+      enterLinkToClauseOps.symbolAndClause.emplace_back(
+          mlir::omp::DeclareTargetCaptureClause::to,
+          eval.getOwningProcedure()->getSubprogramSymbol());
+    }
+
+    ClauseProcessor cp(converter, semaCtx, *clauseList);
+    cp.processDeviceType(deviceTypeClauseOps);
+    cp.processEnter(enterLinkToClauseOps);
+    cp.processLink(enterLinkToClauseOps);
+    cp.processTo(enterLinkToClauseOps);
+    cp.processTODO<Fortran::parser::OmpClause::Indirect>(
+        converter.getCurrentLocation(),
+        llvm::omp::Directive::OMPD_declare_target);
+  }
+
+  return deviceTypeClauseOps.deviceType;
+}
+
+static void collectDeferredDeclareTargets(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
+    llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo>
+        &deferredDeclareTarget) {
+  EnterLinkToClauseOps clauseOps;
+  mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo(
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
+  // Return the device type only if at least one of the targets for the
+  // directive is a function or subroutine
+  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
+    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
+        std::get<const Fortran::semantics::Symbol &>(symClause)));
+
+    if (!op) {
+      deferredDeclareTarget.push_back(
+          {std::get<0>(symClause), devType, std::get<1>(symClause)});
+    }
+  }
+}
+
+static std::optional<mlir::omp::DeclareTargetDeviceType>
+getDeclareTargetFunctionDevice(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPDeclareTargetConstruct
+        &declareTargetConstruct) {
+  EnterLinkToClauseOps clauseOps;
+  mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
+
+  // Return the device type only if at least one of the targets for the
+  // directive is a function or subroutine
+  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
+    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
+        std::get<const Fortran::semantics::Symbol &>(symClause)));
+
+    if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
+      return deviceType;
+  }
+
+  return std::nullopt;
+}
+
+static mlir::Operation *
+createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
+                              mlir::Location loc, mlir::Value indexVal,
+                              const Fortran::semantics::Symbol *sym) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
+  firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+
+  mlir::Type tempTy = converter.genType(*sym);
+  mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
+      loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
+      /*shapeParams*/ mlir::ValueRange{},
+      llvm::ArrayRef<mlir::NamedAttribute>{
+          fir::getAdaptToByRefAttr(firOpBuilder)});
+  converter.bindSymbol(*sym, temp);
+  firOpBuilder.restoreInsertionPoint(insPt);
+  mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
+  mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
+      loc, cvtVal, converter.getSymbolAddress(*sym));
+  return storeOp;
+}
+
+static void
+genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
+            mlir::Location &loc,
+            llvm::ArrayRef<const Fortran::semantics::Symbol *> args) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  auto &region = op->getRegion(0);
+
+  std::size_t loopVarTypeSize = 0;
+  for (const Fortran::semantics::Symbol *arg : args)
+    loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
+  llvm::SmallVector<mlir::Location> locs(args.size(), loc);
+  firOpBuilder.createBlock(&region, {}, tiv, locs);
+  // The argument is not currently in memory, so make a temporary for the
+  // argument, and store it there, then bind that location to the argument.
+  mlir::Operation *storeOp = nullptr;
+  for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
+    mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
+    storeOp =
+        createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
+  }
+  firOpBuilder.setInsertionPointAfter(storeOp);
+}
+
+static void genReductionVars(
+    mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
+    mlir::Location &loc,
+    llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionArgs,
+    llvm::ArrayRef<mlir::Type> reductionTypes) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  llvm::SmallVector<mlir::Location> blockArgLocs(reductionArgs.size(), loc);
+
+  mlir::Block *entryBlock = firOpBuilder.createBlock(
+      &op->getRegion(0), {}, reductionTypes, blockArgLocs);
+
+  // Bind the reduction arguments to their block arguments
+  for (auto [arg, prv] :
+       llvm::zip_equal(reductionArgs, entryBlock->getArguments())) {
+    converter.bindSymbol(*arg, prv);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Code generation functions for clauses
+//===----------------------------------------------------------------------===//
+
+// TODO Try to compile, check privatization of simple wsloop/simdloop/distribute
+// TODO Move common args and functions into a ConstructProcessor class
+
+static void
+genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter,
+                          Fortran::semantics::SemanticsContext &semaCtx,
+                          const Fortran::parser::OmpClauseList &clauses,
+                          mlir::Location loc,
+                          CriticalDeclareOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processHint(clauseOps);
+}
+
+static void genDataClauses(Fortran::lower::AbstractConverter &converter,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::StatementContext &stmtCtx,
+                           const Fortran::parser::OmpClauseList &clauses,
+                           mlir::Location loc, DataOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData,
+               clauseOps);
+  cp.processMap(loc, stmtCtx, clauseOps);
+  cp.processUseDeviceAddr(clauseOps);
+  cp.processUseDevicePtr(clauseOps);
+}
+
+static void genDistributeClauses(Fortran::lower::AbstractConverter &converter,
+                                 Fortran::semantics::SemanticsContext &semaCtx,
+                                 const Fortran::parser::OmpClauseList &clauses,
+                                 mlir::Location loc,
+                                 DistributeOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::DistSchedule,
+                 Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_distribute);
+}
+
+static void genEnterExitUpdateDataClauses(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::StatementContext &stmtCtx,
+    const Fortran::parser::OmpClauseList &clauses, mlir::Location loc,
+    Fortran::parser::OmpIfClause::DirectiveNameModifier directive,
+    EnterExitUpdateDataOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDepend(clauseOps);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(directive, clauseOps);
+  cp.processNowait(clauseOps);
+
+  if (directive ==
+      Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate) {
+    cp.processMotionClauses<Fortran::parser::OmpClause::To>(stmtCtx, clauseOps);
+    cp.processMotionClauses<Fortran::parser::OmpClause::From>(stmtCtx,
+                                                              clauseOps);
+  } else {
+    cp.processMap(loc, stmtCtx, clauseOps);
+  }
+}
+
+static void genFlushClauses(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    const std::optional<Fortran::parser::OmpObjectList> &objects,
+    const std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>
+        &clauses,
+    mlir::Location loc, llvm::SmallVectorImpl<mlir::Value> &operandRange) {
+  if (objects)
+    genObjectList(*objects, converter, operandRange);
+
+  if (clauses && clauses->size() > 0)
+    TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause");
+}
+
+static void genLoopNestClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::pft::Evaluation &eval,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               LoopNestOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processCollapse(loc, eval, clauseOps);
+}
+
+static void
+genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter,
+                        Fortran::semantics::SemanticsContext &semaCtx,
+                        const Fortran::parser::OmpClauseList &clauses,
+                        mlir::Location loc,
+                        OrderedRegionOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processTODO<Fortran::parser::OmpClause::Simd,
+                 Fortran::parser::OmpClause::Threads>(
+      loc, llvm::omp::Directive::OMPD_ordered);
+}
+
+static void genParallelClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc, bool processReduction,
+                               bool evalNumThreadsOutsideTarget,
+                               ParallelOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel,
+               clauseOps);
+  cp.processProcBind(clauseOps);
+
+  if (processReduction)
+    cp.processReduction(loc, clauseOps);
+
+  if (evalNumThreadsOutsideTarget) {
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    cp.processNumThreads(stmtCtx, clauseOps);
+  } else {
+    cp.processNumThreads(stmtCtx, clauseOps);
+  }
+}
+
+static void genSectionsClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               bool clausesFromBeginSections,
+                               SectionsOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  if (clausesFromBeginSections) {
+    cp.processAllocate(clauseOps);
+    cp.processSectionsReduction(loc, clauseOps);
+  } else {
+    cp.processNowait(clauseOps);
+  }
+}
+
+static void genSimdLoopClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               SimdLoopOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
+               clauseOps);
+  cp.processReduction(loc, clauseOps);
+  cp.processSafelen(clauseOps);
+  cp.processSimdlen(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::Aligned,
+                 Fortran::parser::OmpClause::Linear,
+                 Fortran::parser::OmpClause::Nontemporal,
+                 Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_simd);
+}
+
+static void genSingleClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             const Fortran::parser::OmpClauseList &beginClauses,
+                             const Fortran::parser::OmpClauseList &endClauses,
+                             mlir::Location loc, SingleOpClauseOps &clauseOps) {
+  ClauseProcessor bcp(converter, semaCtx, beginClauses);
+  bcp.processAllocate(clauseOps);
+
+  ClauseProcessor ecp(converter, semaCtx, endClauses);
+  ecp.processCopyprivate(loc, clauseOps);
+  ecp.processNowait(clauseOps);
+}
+
+static void genTargetClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             Fortran::lower::StatementContext &stmtCtx,
+                             const Fortran::parser::OmpClauseList &clauses,
+                             mlir::Location loc, bool processHostOnlyClauses,
+                             bool processReduction,
+                             TargetOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processDepend(clauseOps);
+  cp.processDevice(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
+               clauseOps);
+  cp.processMap(loc, stmtCtx, clauseOps);
+  cp.processThreadLimit(stmtCtx, clauseOps);
+
+  if (processHostOnlyClauses)
+    cp.processNowait(clauseOps);
+
+  if (processReduction)
+    cp.processTargetReduction(clauseOps);
+
+  cp.processTODO<Fortran::parser::OmpClause::Allocate,
+                 Fortran::parser::OmpClause::Defaultmap,
+                 Fortran::parser::OmpClause::Firstprivate,
+                 Fortran::parser::OmpClause::HasDeviceAddr,
+                 Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::IsDevicePtr,
+                 Fortran::parser::OmpClause::Private,
+                 Fortran::parser::OmpClause::UsesAllocators>(
+      loc, llvm::omp::Directive::OMPD_target);
+}
+
+static void genTaskClauses(Fortran::lower::AbstractConverter &converter,
+                           Fortran::semantics::SemanticsContext &semaCtx,
+                           Fortran::lower::StatementContext &stmtCtx,
+                           const Fortran::parser::OmpClauseList &clauses,
+                           mlir::Location loc, TaskOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processDepend(clauseOps);
+  cp.processFinal(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task,
+               clauseOps);
+  cp.processMergeable(clauseOps);
+  cp.processPriority(stmtCtx, clauseOps);
+  cp.processUntied(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::Detach,
+                 Fortran::parser::OmpClause::Affinity>(
+      loc, llvm::omp::Directive::OMPD_task);
+}
+
+static void genTaskGroupClauses(Fortran::lower::AbstractConverter &converter,
+                                Fortran::semantics::SemanticsContext &semaCtx,
+                                const Fortran::parser::OmpClauseList &clauses,
+                                mlir::Location loc,
+                                TaskGroupOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::TaskReduction>(
+      loc, llvm::omp::Directive::OMPD_taskgroup);
+}
+
+static void genTaskLoopClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               TaskLoopOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processDefault();
+  cp.processFinal(stmtCtx, clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Taskloop,
+               clauseOps);
+  cp.processMergeable(clauseOps);
+  cp.processPriority(stmtCtx, clauseOps);
+  cp.processReduction(loc, clauseOps);
+  cp.processUntied(clauseOps);
+  cp.processTODO<Fortran::parser::OmpClause::Grainsize,
+                 Fortran::parser::OmpClause::InReduction,
+                 Fortran::parser::OmpClause::Nogroup,
+                 Fortran::parser::OmpClause::NumTasks>(
+      loc, llvm::omp::Directive::OMPD_taskloop);
+}
+
+static void genTaskWaitClauses(Fortran::lower::AbstractConverter &converter,
+                               Fortran::semantics::SemanticsContext &semaCtx,
+                               const Fortran::parser::OmpClauseList &clauses,
+                               mlir::Location loc,
+                               TaskWaitOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processTODO<Fortran::parser::OmpClause::Depend,
+                 Fortran::parser::OmpClause::Nowait>(
+      loc, llvm::omp::Directive::OMPD_taskwait);
+}
+
+static void genTeamsClauses(Fortran::lower::AbstractConverter &converter,
+                            Fortran::semantics::SemanticsContext &semaCtx,
+                            Fortran::lower::StatementContext &stmtCtx,
+                            const Fortran::parser::OmpClauseList &clauses,
+                            mlir::Location loc, bool evalNumTeamsOutsideTarget,
+                            TeamsOpClauseOps &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processAllocate(clauseOps);
+  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
+               clauseOps);
+  cp.processDefault();
+
+  if (evalNumTeamsOutsideTarget) {
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    cp.processNumTeams(stmtCtx, clauseOps);
+    cp.processThreadLimit(stmtCtx, clauseOps);
+  } else {
+    cp.processNumTeams(stmtCtx, clauseOps);
+    cp.processThreadLimit(stmtCtx, clauseOps);
+  }
+}
+
+static void genWsLoopClauses(Fortran::lower::AbstractConverter &converter,
+                             Fortran::semantics::SemanticsContext &semaCtx,
+                             Fortran::lower::StatementContext &stmtCtx,
+                             const Fortran::parser::OmpClauseList &beginClauses,
+                             const Fortran::parser::OmpClauseList *endClauses,
+                             mlir::Location loc, WsloopOpClauseOps &clauseOps) {
+  ClauseProcessor bcp(converter, semaCtx, beginClauses);
+  bcp.processOrdered(clauseOps);
+  bcp.processReduction(loc, clauseOps);
+  bcp.processSchedule(stmtCtx, clauseOps);
+
+  if (endClauses) {
+    ClauseProcessor ecp(converter, semaCtx, *endClauses);
+    ecp.processNowait(clauseOps);
+  }
+
+  bcp.processTODO<Fortran::parser::OmpClause::Linear,
+                  Fortran::parser::OmpClause::Order>(
+      loc, llvm::omp::Directive::OMPD_do);
+}
+
+//===----------------------------------------------------------------------===//
+// Code generation functions for leaf constructs
+//===----------------------------------------------------------------------===//
+
+// TODO Pass <X>OpClauseOps as arg to all gen<X>Op
+
+static mlir::omp::BarrierOp
+genBarrierOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::semantics::SemanticsContext &semaCtx,
+             Fortran::lower::pft::Evaluation &eval,
+             mlir::Location currentLocation) {
+  return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(
+      currentLocation);
+}
+
+static mlir::omp::CriticalOp
+genCriticalOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool genNested,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              const std::optional<Fortran::parser::Name> &name) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  mlir::FlatSymbolRefAttr nameAttr;
+
+  if (name.has_value()) {
+    CriticalDeclareOpClauseOps clauseOps;
+    genCriticalDeclareClauses(converter, semaCtx, clauseList, currentLocation,
+                              clauseOps);
+
+    std::string nameStr = name.value().ToString();
+    mlir::ModuleOp module = firOpBuilder.getModule();
+    auto global = module.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
+    if (!global) {
+      mlir::OpBuilder modBuilder(module.getBodyRegion());
+      global = modBuilder.create<mlir::omp::CriticalDeclareOp>(
+          currentLocation, firOpBuilder.getStringAttr(nameStr),
+          clauseOps.hintAttr);
+    }
+    nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
+                                            global.getSymName());
+  }
+
+  return genOpWithBody<mlir::omp::CriticalOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      nameAttr);
+}
+
+static mlir::omp::DataOp
+genDataOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::semantics::SemanticsContext &semaCtx,
+          Fortran::lower::pft::Evaluation &eval, bool genNested,
+          mlir::Location currentLocation,
+          const Fortran::parser::OmpClauseList &clauseList) {
+  Fortran::lower::StatementContext stmtCtx;
+  DataOpClauseOps clauseOps;
+  genDataClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                 clauseOps);
+
+  auto dataOp = converter.getFirOpBuilder().create<mlir::omp::DataOp>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.useDevicePtrVars, clauseOps.useDeviceAddrVars,
+      clauseOps.mapVars);
+
+  genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp,
+                        clauseOps.useDeviceTypes, clauseOps.useDeviceLocs,
+                        clauseOps.useDeviceSymbols, currentLocation);
+  return dataOp;
+}
+
+static mlir::omp::DistributeOp
+genDistributeOp(Fortran::lower::AbstractConverter &converter,
+                Fortran::semantics::SemanticsContext &semaCtx,
+                Fortran::lower::pft::Evaluation &eval, bool isComposite,
+                mlir::Location currentLocation,
+                const Fortran::parser::OmpClauseList &clauseList,
+                bool outerCombined = false) {
+  DistributeOpClauseOps clauseOps;
+  genDistributeClauses(converter, semaCtx, clauseList, currentLocation,
+                       clauseOps);
+
+  return genOpWithBody<mlir::omp::DistributeOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(false)
+          .setOuterCombined(outerCombined)
+          .setClauses(&clauseList),
+      clauseOps.distScheduleStaticAttr, clauseOps.distScheduleChunkSizeVar,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.orderAttr,
+      isComposite ? converter.getFirOpBuilder().getUnitAttr() : nullptr);
+}
+
+template <typename OpTy>
+static OpTy
+genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter,
+                         Fortran::semantics::SemanticsContext &semaCtx,
+                         mlir::Location currentLocation,
+                         const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+
+  Fortran::parser::OmpIfClause::DirectiveNameModifier directive;
+  if constexpr (std::is_same_v<OpTy, mlir::omp::EnterDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData;
+  } else if constexpr (std::is_same_v<OpTy, mlir::omp::ExitDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData;
+  } else if constexpr (std::is_same_v<OpTy, mlir::omp::UpdateDataOp>) {
+    directive =
+        Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate;
+  } else {
+    llvm_unreachable("Unexpected TARGET data construct");
+  }
+
+  EnterExitUpdateDataOpClauseOps clauseOps;
+  genEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauseList,
+                                currentLocation, directive, clauseOps);
+
+  return firOpBuilder.create<OpTy>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.dependTypeAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars);
+}
+
+static mlir::omp::FlushOp
+genFlushOp(Fortran::lower::AbstractConverter &converter,
+           Fortran::semantics::SemanticsContext &semaCtx,
+           Fortran::lower::pft::Evaluation &eval,
+           mlir::Location currentLocation,
+           const std::optional<Fortran::parser::OmpObjectList> &objectList,
+           const std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>
+               &clauseList) {
+  llvm::SmallVector<mlir::Value, 4> operandRange;
+  genFlushClauses(converter, semaCtx, objectList, clauseList, currentLocation,
+                  operandRange);
+
+  return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
+      converter.getCurrentLocation(), operandRange);
+}
+
+static mlir::omp::LoopNestOp
+genLoopNestOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              const LoopNestOpClauseOps &clauseOps, DataSharingProcessor &dsp) {
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  auto ivCallback = [&](mlir::Operation *op) {
+    genLoopVars(op, converter, currentLocation, clauseOps.loopIV);
+    return clauseOps.loopIV;
+  };
+
+  return genOpWithBody<mlir::omp::LoopNestOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setClauses(&clauseList)
+          .setDataSharingProcessor(&dsp)
+          .setGenRegionEntryCb(ivCallback)
+          .setGenNested(true),
+      clauseOps.loopLBVar, clauseOps.loopUBVar, clauseOps.loopStepVar,
+      /*inclusive=*/converter.getFirOpBuilder().getUnitAttr());
+}
+
+static mlir::omp::MasterOp
+genMasterOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool genNested,
+            mlir::Location currentLocation) {
+  return genOpWithBody<mlir::omp::MasterOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      /*resultTypes=*/mlir::TypeRange());
+}
+
+static mlir::omp::OrderedRegionOp
+genOrderedRegionOp(Fortran::lower::AbstractConverter &converter,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval, bool genNested,
+                   mlir::Location currentLocation,
+                   const Fortran::parser::OmpClauseList &clauseList) {
+  OrderedRegionOpClauseOps clauseOps;
+  genOrderedRegionClauses(converter, semaCtx, clauseList, currentLocation,
+                          clauseOps);
+
+  // TODO Store clauseOps.parLevelThreadsAttr in op.
+  return genOpWithBody<mlir::omp::OrderedRegionOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested),
+      clauseOps.parLevelSimdAttr);
+}
+
+static mlir::omp::ParallelOp
+genParallelOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::lower::SymMap &symTable,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool genNested,
+              bool isComposite, mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList,
+              bool outerCombined = false) {
+  // TODO Distinguish between genParallelOp as block vs wrapper
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  ParallelOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+
+  auto offloadModOp =
+      llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp());
+  mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder);
+
+  bool evalNumThreadsOutsideTarget =
+      targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval);
+
+  genParallelClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     /*processReduction=*/!outerCombined,
+                     evalNumThreadsOutsideTarget, clauseOps);
+
+  auto reductionCallback = [&](mlir::Operation *op) {
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
+
+  OpWithBodyGenInfo genInfo =
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setOuterCombined(outerCombined)
+          .setClauses(&clauseList)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenRegionEntryCb(reductionCallback);
+
+  if (!enableDelayedPrivatization) {
+    auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
+        genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr,
+        clauseOps.allocateVars, clauseOps.allocatorVars,
+        clauseOps.reductionVars,
+        clauseOps.reductionDeclSymbols.empty()
+            ? nullptr
+            : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+        clauseOps.procBindKindAttr, clauseOps.privateVars,
+        clauseOps.privatizers.empty()
+            ? nullptr
+            : firOpBuilder.getArrayAttr(clauseOps.privatizers),
+        isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+
+    if (clauseOps.numThreadsVar) {
+      if (evalNumThreadsOutsideTarget)
+        targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar);
+      else
+        parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar);
     }
-    valuesDefinedAbove.clear();
-    mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
+
+    return parallelOp;
   }
 
-  // Insert dummy instruction to remember the insertion position. The
-  // marker will be deleted since there are not uses.
-  // In the HLFIR flow there are hlfir.declares inserted above while
-  // setting block arguments.
-  mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
-      targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType());
+  // TODO Integrate delayed privatization better with the new approach.
+  //   - Store delayedPrivatizationInfo.{originalAddresses,privatizers} in
+  // clauseOps.{privateVars,privatizers}.
+  //   - Outline genRegionEntryCB into composable genPrivatizedVars.
+  //   - Refactor to create the omp.parallel op in a single place and possibly
+  // only use a single callback.
+  //   - Check whether the external DataSharingProcessor could be used, and skip
+  // the call to processStep1() here. Perhaps also skip setting it in the
+  // OpWithBodyGenInfo structure.
 
-  // Create blocks for unstructured regions. This has to be done since
-  // blocks are initially allocated with the function as the parent region.
-  if (eval.lowerAsUnstructured()) {
-    Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
-                                            mlir::omp::YieldOp>(
-        firOpBuilder, eval.getNestedEvaluations());
+  bool privatize = !outerCombined;
+  DataSharingProcessor dsp(converter, clauseList, eval,
+                           /*useDelayedPrivatization=*/true, &symTable);
+
+  if (privatize)
+    dsp.processStep1();
+
+  const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo();
+
+  auto genRegionEntryCB = [&](mlir::Operation *op) {
+    auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);
+
+    llvm::SmallVector<mlir::Location> reductionLocs(
+        clauseOps.reductionVars.size(), currentLocation);
+
+    mlir::OperandRange privateVars = parallelOp.getPrivateVars();
+    mlir::Region &region = parallelOp.getRegion();
+
+    llvm::SmallVector<mlir::Type> privateVarTypes = clauseOps.reductionTypes;
+    privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarTypes),
+                    [](mlir::Value v) { return v.getType(); });
+
+    llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
+    privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
+    llvm::transform(privateVars, std::back_inserter(privateVarLocs),
+                    [](mlir::Value v) { return v.getLoc(); });
+
+    converter.getFirOpBuilder().createBlock(&region, /*insertPt=*/{},
+                                            privateVarTypes, privateVarLocs);
+
+    llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
+        *clauseOps.reductionSymbols;
+    allSymbols.append(delayedPrivatizationInfo.symbols);
+    for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
+      converter.bindSymbol(*arg, prv);
+    }
+
+    return allSymbols;
+  };
+
+  // TODO Merge with the reduction CB.
+  genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
+
+  llvm::SmallVector<mlir::Attribute> privatizers(
+      delayedPrivatizationInfo.privatizers.begin(),
+      delayedPrivatizationInfo.privatizers.end());
+
+  auto parallelOp = genOpWithBody<mlir::omp::ParallelOp>(
+      genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.procBindKindAttr, delayedPrivatizationInfo.originalAddresses,
+      delayedPrivatizationInfo.privatizers.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(privatizers),
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+
+  if (clauseOps.numThreadsVar) {
+    if (evalNumThreadsOutsideTarget)
+      targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar);
+    else
+      parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar);
   }
 
-  firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
+  return parallelOp;
+}
 
-  // Create the insertion point after the marker.
-  firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
-  if (genNested)
-    genNestedEvaluations(converter, eval);
+static mlir::omp::SectionOp
+genSectionOp(Fortran::lower::AbstractConverter &converter,
+             Fortran::semantics::SemanticsContext &semaCtx,
+             Fortran::lower::pft::Evaluation &eval, bool genNested,
+             mlir::Location currentLocation,
+             const Fortran::parser::OmpClauseList &clauseList) {
+  // Currently only private/firstprivate clause is handled, and
+  // all privatization is done within `omp.section` operations.
+  return genOpWithBody<mlir::omp::SectionOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList));
+}
+
+static mlir::omp::SectionsOp
+genSectionsOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const SectionsOpClauseOps &clauseOps) {
+  return genOpWithBody<mlir::omp::SectionsOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(false),
+      /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr,
+      clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.nowaitAttr);
+}
+
+static mlir::omp::SimdLoopOp
+genSimdLoopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool isComposite,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  SimdLoopOpClauseOps clauseOps;
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     clauseOps);
+
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  // TODO Create callback to add reduction vars as entry block arguments.
+
+  // TODO Store clauseOps.reductionVars, clauseOps.reductionDeclSymbols in op.
+  return genOpWithBody<mlir::omp::SimdLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setGenNested(false),
+      /*result_types=*/mlir::TypeRange(), clauseOps.alignedVars,
+      clauseOps.alignmentAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.alignmentAttrs),
+      clauseOps.ifVar, clauseOps.nontemporalVars, clauseOps.orderAttr,
+      clauseOps.simdlenAttr, clauseOps.safelenAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+}
+
+static mlir::omp::SingleOp
+genSingleOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool genNested,
+            mlir::Location currentLocation,
+            const Fortran::parser::OmpClauseList &beginClauseList,
+            const Fortran::parser::OmpClauseList &endClauseList) {
+  SingleOpClauseOps clauseOps;
+  genSingleClauses(converter, semaCtx, beginClauseList, endClauseList,
+                   currentLocation, clauseOps);
+
+  return genOpWithBody<mlir::omp::SingleOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&beginClauseList),
+      clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.copyprivateVars,
+      clauseOps.copyprivateFuncs.empty()
+          ? nullptr
+          : converter.getFirOpBuilder().getArrayAttr(
+                clauseOps.copyprivateFuncs),
+      clauseOps.nowaitAttr);
 }
 
 static mlir::omp::TargetOp
@@ -1243,50 +1719,29 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
             Fortran::lower::pft::Evaluation &eval, bool genNested,
             mlir::Location currentLocation,
             const Fortran::parser::OmpClauseList &clauseList,
-            llvm::omp::Directive directive, bool outerCombined = false) {
+            bool outerCombined = false) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
-  mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand;
-  mlir::UnitAttr nowaitAttr;
-  llvm::SmallVector<mlir::Attribute> dependTypeOperands;
-  llvm::SmallVector<mlir::Value> mapOperands, dependOperands;
-  llvm::SmallVector<mlir::Type> mapSymTypes;
-  llvm::SmallVector<mlir::Location> mapSymLocs;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> mapSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target,
-              ifClauseOperand);
-  cp.processDevice(stmtCtx, deviceOperand);
-  cp.processThreadLimit(stmtCtx, threadLimitOperand);
-  cp.processDepend(dependTypeOperands, dependOperands);
-  cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes,
-                &mapSymLocs, &mapSymbols);
-
-  cp.processTODO<Fortran::parser::OmpClause::Private,
-                 Fortran::parser::OmpClause::Firstprivate,
-                 Fortran::parser::OmpClause::IsDevicePtr,
-                 Fortran::parser::OmpClause::HasDeviceAddr,
-                 Fortran::parser::OmpClause::InReduction,
-                 Fortran::parser::OmpClause::Allocate,
-                 Fortran::parser::OmpClause::UsesAllocators,
-                 Fortran::parser::OmpClause::Defaultmap>(
-      currentLocation, llvm::omp::Directive::OMPD_target);
 
-  // Process host-only clauses.
-  if (!llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
-           .getIsTargetDevice())
-    cp.processNowait(nowaitAttr);
+  bool processHostOnlyClauses =
+      !llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
+           .getIsTargetDevice();
 
-  if (outerCombined)
-    cp.processTargetReduction(reductionSymbols);
+  TargetOpClauseOps clauseOps;
+  clauseOps.mapSymbols.emplace();
+  clauseOps.mapSymLocs.emplace();
+  clauseOps.mapSymTypes.emplace();
+  genTargetClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                   processHostOnlyClauses, /*processReduction=*/outerCombined,
+                   clauseOps);
 
   // 5.8.1 Implicit Data-Mapping Attribute Rules
   // The following code follows the implicit data-mapping rules to map all the
   // symbols used inside the region that have not been explicitly mapped using
   // the map clause.
   auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
-    if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
+    if (llvm::find(*clauseOps.mapSymbols, &sym) ==
+        clauseOps.mapSymbols->end()) {
       mlir::Value baseOp = converter.getSymbolAddress(sym);
       if (!baseOp)
         if (const auto *details = sym.template detailsIf<
@@ -1301,22 +1756,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
         fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
         name << sym.name().ToString();
 
-        Fortran::lower::AddrAndBoundsInfo info =
-            getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym,
-                                   converter.getCurrentLocation());
+        Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr(
+            converter, firOpBuilder, sym, converter.getCurrentLocation());
         if (fir::unwrapRefType(info.addr.getType()).isa<fir::BaseBoxType>())
           bounds =
               Fortran::lower::genBoundsOpsFromBox<mlir::omp::DataBoundsOp,
                                                   mlir::omp::DataBoundsType>(
-                  converter.getFirOpBuilder(), converter.getCurrentLocation(),
-                  converter, dataExv, info);
+                  firOpBuilder, converter.getCurrentLocation(), converter,
+                  dataExv, info);
         if (fir::unwrapRefType(info.addr.getType()).isa<fir::SequenceType>()) {
           bool dataExvIsAssumedSize =
               Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate());
           bounds = Fortran::lower::genBaseBoundsOps<mlir::omp::DataBoundsOp,
                                                     mlir::omp::DataBoundsType>(
-              converter.getFirOpBuilder(), converter.getCurrentLocation(),
-              converter, dataExv, dataExvIsAssumedSize);
+              firOpBuilder, converter.getCurrentLocation(), converter, dataExv,
+              dataExvIsAssumedSize);
         }
 
         llvm::omp::OpenMPOffloadMappingFlags mapFlag =
@@ -1342,8 +1796,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
             mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
             mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
           }
-        } else if (llvm::find(reductionSymbols, &sym) !=
-                   reductionSymbols.end()) {
+        } else if (llvm::find(clauseOps.targetReductionSymbols, &sym) !=
+                   clauseOps.targetReductionSymbols.end()) {
           // Do a tofrom map for reduction variables.
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
@@ -1355,39 +1809,160 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
         }
 
         mlir::Value mapOp = createMapInfoOp(
-            converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{},
-            name.str(), bounds, {},
+            firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(),
+            bounds, {},
             static_cast<
                 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
                 mapFlag),
             captureKind, baseOp.getType());
 
-        mapOperands.push_back(mapOp);
-        mapSymTypes.push_back(baseOp.getType());
-        mapSymLocs.push_back(baseOp.getLoc());
-        mapSymbols.push_back(&sym);
+        clauseOps.mapVars.push_back(mapOp);
+        clauseOps.mapSymTypes->push_back(baseOp.getType());
+        clauseOps.mapSymLocs->push_back(baseOp.getLoc());
+        clauseOps.mapSymbols->push_back(&sym);
       }
     }
   };
   Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap);
 
-  auto targetOp = converter.getFirOpBuilder().create<mlir::omp::TargetOp>(
-      currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand,
+  auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(
+      currentLocation, clauseOps.ifVar, clauseOps.deviceVar,
+      clauseOps.threadLimitVar,
       /*trip_count=*/nullptr,
-      dependTypeOperands.empty()
+      clauseOps.dependTypeAttrs.empty()
           ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 dependTypeOperands),
-      dependOperands, nowaitAttr, mapOperands,
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars,
       /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr,
       /*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr);
 
-  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes,
-                    mapSymLocs, mapSymbols, currentLocation);
+  genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp,
+                    *clauseOps.mapSymTypes, *clauseOps.mapSymLocs,
+                    *clauseOps.mapSymbols, currentLocation);
 
   return targetOp;
 }
 
+static mlir::omp::TaskGroupOp
+genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::semantics::SemanticsContext &semaCtx,
+               Fortran::lower::pft::Evaluation &eval, bool genNested,
+               mlir::Location currentLocation,
+               const Fortran::parser::OmpClauseList &clauseList) {
+  TaskGroupOpClauseOps clauseOps;
+  genTaskGroupClauses(converter, semaCtx, clauseList, currentLocation,
+                      clauseOps);
+
+  // TODO Possibly create callback to add task reduction vars as entry block
+  // arguments.
+
+  return genOpWithBody<mlir::omp::TaskGroupOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList),
+      clauseOps.taskReductionVars,
+      clauseOps.taskReductionDeclSymbols.empty()
+          ? nullptr
+          : converter.getFirOpBuilder().getArrayAttr(
+                clauseOps.taskReductionDeclSymbols),
+      clauseOps.allocateVars, clauseOps.allocatorVars);
+}
+
+static mlir::omp::TaskLoopOp
+genTaskLoopOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval, bool isComposite,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  TaskLoopOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+  genTaskLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                     clauseOps);
+
+  auto *nestedEval =
+      getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList));
+
+  auto reductionCallback = [&](mlir::Operation *op) {
+    // TODO Possibly add in-reductions to the entry block argument list.
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
+
+  return genOpWithBody<mlir::omp::TaskLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setGenRegionEntryCb(reductionCallback)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenNested(false),
+      clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr,
+      clauseOps.mergeableAttr, clauseOps.inReductionVars,
+      clauseOps.inReductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols),
+      clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.priorityVar, clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.grainsizeVar, clauseOps.numTasksVar, clauseOps.nogroupAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
+}
+
+static mlir::omp::TaskOp
+genTaskOp(Fortran::lower::AbstractConverter &converter,
+          Fortran::semantics::SemanticsContext &semaCtx,
+          Fortran::lower::pft::Evaluation &eval, bool genNested,
+          mlir::Location currentLocation,
+          const Fortran::parser::OmpClauseList &clauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  TaskOpClauseOps clauseOps;
+  genTaskClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                 clauseOps);
+
+  // TODO Possibly create callback to add in-reductions as entry block
+  // arguments.
+
+  return genOpWithBody<mlir::omp::TaskOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
+          .setGenNested(genNested)
+          .setClauses(&clauseList),
+      clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr,
+      clauseOps.mergeableAttr, clauseOps.inReductionVars,
+      clauseOps.inReductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols),
+      clauseOps.priorityVar,
+      clauseOps.dependTypeAttrs.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs),
+      clauseOps.dependVars, clauseOps.allocateVars, clauseOps.allocatorVars);
+}
+
+static mlir::omp::TaskWaitOp
+genTaskWaitOp(Fortran::lower::AbstractConverter &converter,
+              Fortran::semantics::SemanticsContext &semaCtx,
+              Fortran::lower::pft::Evaluation &eval,
+              mlir::Location currentLocation,
+              const Fortran::parser::OmpClauseList &clauseList) {
+  TaskWaitOpClauseOps clauseOps;
+  genTaskWaitClauses(converter, semaCtx, clauseList, currentLocation,
+                     clauseOps);
+  return converter.getFirOpBuilder().create<mlir::omp::TaskWaitOp>(
+      currentLocation);
+}
+
+static mlir::omp::TaskYieldOp
+genTaskYieldOp(Fortran::lower::AbstractConverter &converter,
+               Fortran::semantics::SemanticsContext &semaCtx,
+               Fortran::lower::pft::Evaluation &eval,
+               mlir::Location currentLocation) {
+  return converter.getFirOpBuilder().create<mlir::omp::TaskYieldOp>(
+      currentLocation);
+}
+
 static mlir::omp::TeamsOp
 genTeamsOp(Fortran::lower::AbstractConverter &converter,
            Fortran::semantics::SemanticsContext &semaCtx,
@@ -1395,192 +1970,233 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
            mlir::Location currentLocation,
            const Fortran::parser::OmpClauseList &clauseList,
            bool outerCombined = false) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   Fortran::lower::StatementContext stmtCtx;
-  mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand;
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands,
-      reductionVars;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-
-  ClauseProcessor cp(converter, semaCtx, clauseList);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams,
-               ifClauseOperand);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-  cp.processDefault();
-  //  cp.processTODO<Fortran::parser::OmpClause::Reduction>(
-  //      currentLocation, llvm::omp::Directive::OMPD_teams);
+  TeamsOpClauseOps clauseOps;
 
-  // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside
-  // of an omp.target operation.
   auto offloadModOp = llvm::cast<mlir::omp::OffloadModuleInterface>(
       converter.getModuleOp().getOperation());
-  mlir::omp::TargetOp targetOp =
-      findParentTargetOp(converter.getFirOpBuilder());
+  mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder);
 
-  bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice();
-  if (mustEvalOutsideTarget) {
-    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
-    cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
-    cp.processThreadLimit(stmtCtx, threadLimitClauseOperand);
-  } else {
-    cp.processNumTeams(stmtCtx, numTeamsClauseOperand);
-    cp.processThreadLimit(stmtCtx, threadLimitClauseOperand);
-  }
+  bool evalNumTeamsOutsideTarget =
+      targetOp && !offloadModOp.getIsTargetDevice();
+
+  genTeamsClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation,
+                  evalNumTeamsOutsideTarget, clauseOps);
+
+  // TODO Possibly create callback to add reductions as entry block arguments.
 
   auto teamsOp = genOpWithBody<mlir::omp::TeamsOp>(
       OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
           .setGenNested(genNested)
           .setOuterCombined(outerCombined)
           .setClauses(&clauseList),
-      /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, ifClauseOperand,
-      /*thread_limit=*/nullptr, allocateOperands, allocatorOperands,
-      reductionVars,
-      reductionDeclSymbols.empty()
+      /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, clauseOps.ifVar,
+      /*thread_limit=*/nullptr, clauseOps.allocateVars, clauseOps.allocatorVars,
+      clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
           ? nullptr
-          : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(),
-                                 reductionDeclSymbols));
-  if (mustEvalOutsideTarget) {
-    if (numTeamsClauseOperand)
-      targetOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand);
-    if (threadLimitClauseOperand)
-      targetOp.getTeamsThreadLimitMutable().assign(threadLimitClauseOperand);
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols));
+
+  // TODO Populate lower bound once supported by the clause processor
+  if (evalNumTeamsOutsideTarget) {
+    if (clauseOps.numTeamsUpperVar)
+      targetOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar);
+    if (clauseOps.threadLimitVar)
+      targetOp.getTeamsThreadLimitMutable().assign(clauseOps.threadLimitVar);
   } else {
-    if (numTeamsClauseOperand)
-      teamsOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand);
-    if (threadLimitClauseOperand)
-      teamsOp.getThreadLimitMutable().assign(threadLimitClauseOperand);
+    if (clauseOps.numTeamsUpperVar)
+      teamsOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar);
+    if (clauseOps.threadLimitVar)
+      teamsOp.getThreadLimitMutable().assign(clauseOps.threadLimitVar);
   }
 
   return teamsOp;
 }
 
-static mlir::omp::DistributeOp
-genDistributeOp(Fortran::lower::AbstractConverter &converter,
-                Fortran::semantics::SemanticsContext &semaCtx,
-                Fortran::lower::pft::Evaluation &eval, bool genNested,
-                mlir::Location currentLocation,
-                const Fortran::parser::OmpClauseList &clauseList,
-                bool outerCombined = false) {
-  // TODO Process clauses
-  // ClauseProcessor cp(converter, clauseList);
-  // cp.processAllocate(allocatorOperands, allocateOperands);
-  // ...
-
-  return genOpWithBody<mlir::omp::DistributeOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(genNested)
-          .setOuterCombined(outerCombined)
-          .setClauses(&clauseList),
-      /*dist_schedule_static=*/nullptr,
-      /*chunk_size=*/nullptr,
-      /*allocate_vars=*/mlir::ValueRange(),
-      /*allocators_vars=*/mlir::ValueRange(),
-      /*order_val=*/nullptr);
-}
-
-/// Extract the list of function and variable symbols affected by the given
-/// 'declare target' directive and return the intended device type for them.
-static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
-
-  // The default capture type
-  mlir::omp::DeclareTargetDeviceType deviceType =
-      mlir::omp::DeclareTargetDeviceType::any;
-  const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>(
-      declareTargetConstruct.t);
+static mlir::omp::WsLoopOp
+genWsLoopOp(Fortran::lower::AbstractConverter &converter,
+            Fortran::semantics::SemanticsContext &semaCtx,
+            Fortran::lower::pft::Evaluation &eval, bool isComposite,
+            mlir::Location currentLocation,
+            const Fortran::parser::OmpClauseList &beginClauseList,
+            const Fortran::parser::OmpClauseList *endClauseList) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  Fortran::lower::StatementContext stmtCtx;
+  WsloopOpClauseOps clauseOps;
+  clauseOps.reductionSymbols.emplace();
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, clauseOps);
 
-  if (const auto *objectList{
-          Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) {
-    // Case: declare target(func, var1, var2)
-    gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to,
-                         symbolAndClause);
-  } else if (const auto *clauseList{
-                 Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>(
-                     spec.u)}) {
-    if (clauseList->v.empty()) {
-      // Case: declare target, implicit capture of function
-      symbolAndClause.emplace_back(
-          mlir::omp::DeclareTargetCaptureClause::to,
-          eval.getOwningProcedure()->getSubprogramSymbol());
-    }
+  auto *nestedEval = getCollapsedLoopEval(
+      eval, Fortran::lower::getCollapseValue(beginClauseList));
 
-    ClauseProcessor cp(converter, semaCtx, *clauseList);
-    cp.processTo(symbolAndClause);
-    cp.processEnter(symbolAndClause);
-    cp.processLink(symbolAndClause);
-    cp.processDeviceType(deviceType);
-    cp.processTODO<Fortran::parser::OmpClause::Indirect>(
-        converter.getCurrentLocation(),
-        llvm::omp::Directive::OMPD_declare_target);
-  }
+  auto reductionCallback = [&](mlir::Operation *op) {
+    genReductionVars(op, converter, currentLocation,
+                     *clauseOps.reductionSymbols, clauseOps.reductionTypes);
+    return *clauseOps.reductionSymbols;
+  };
 
-  return deviceType;
+  return genOpWithBody<mlir::omp::WsLoopOp>(
+      OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval)
+          .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes)
+          .setGenRegionEntryCb(reductionCallback)
+          .setGenNested(false),
+      clauseOps.linearVars, clauseOps.linearStepVars, clauseOps.reductionVars,
+      clauseOps.reductionDeclSymbols.empty()
+          ? nullptr
+          : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols),
+      clauseOps.scheduleValAttr, clauseOps.scheduleChunkVar,
+      clauseOps.scheduleModAttr, clauseOps.scheduleSimdAttr,
+      clauseOps.nowaitAttr, clauseOps.orderedAttr, clauseOps.orderAttr,
+      isComposite ? firOpBuilder.getUnitAttr() : nullptr);
 }
 
-static void collectDeferredDeclareTargets(
+//===----------------------------------------------------------------------===//
+// Code generation functions for composite constructs
+//===----------------------------------------------------------------------===//
+
+static void genCompositeDistributeParallelDo(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
-    llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo>
-        &deferredDeclareTarget) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
-  mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
-  // Return the device type only if at least one of the targets for the
-  // directive is a function or subroutine
-  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
-
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
-        std::get<const Fortran::semantics::Symbol &>(symClause)));
-
-    if (!op) {
-      deferredDeclareTarget.push_back(
-          {std::get<0>(symClause), devType, std::get<1>(symClause)});
-    }
-  }
-}
-
-static std::optional<mlir::omp::DeclareTargetDeviceType>
-getDeclareTargetFunctionDevice(
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  ParallelOpClauseOps parallelClauseOps;
+  WsloopOpClauseOps wsLoopClauseOps;
+
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  // TODO evalNumThreadsOutsideTarget
+  genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, /*processReduction=*/true,
+                     /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps);
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genParallelOp();
+  // genWsLoopOp();
+  TODO(currentLocation, "Composite DISTRIBUTE PARALLEL DO not implemented");
+}
+
+static void genCompositeDistributeParallelDoSimd(
     Fortran::lower::AbstractConverter &converter,
     Fortran::semantics::SemanticsContext &semaCtx,
     Fortran::lower::pft::Evaluation &eval,
-    const Fortran::parser::OpenMPDeclareTargetConstruct
-        &declareTargetConstruct) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
-  mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  ParallelOpClauseOps parallelClauseOps;
+  WsloopOpClauseOps wsLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  // TODO evalNumThreadsOutsideTarget
+  genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, /*processReduction=*/true,
+                     /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps);
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genParallelOp();
+  // genWsloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation,
+       "Composite DISTRIBUTE PARALLEL DO SIMD not implemented");
+}
+
+static void genCompositeDistributeSimd(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::semantics::SemanticsContext &semaCtx,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OmpClauseList &beginClauseList,
+    const Fortran::parser::OmpClauseList *endClauseList,
+    mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  DistributeOpClauseOps distributeClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
 
-  // Return the device type only if at least one of the targets for the
-  // directive is a function or subroutine
-  mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
-    mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
-        std::get<const Fortran::semantics::Symbol &>(symClause)));
+  genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation,
+                       distributeClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
 
-    if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
-      return deviceType;
-  }
+  // TODO Pass clauseOps structures to generate wrappers
+  // genDistributeOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite DISTRIBUTE SIMD not implemented");
+}
 
-  return std::nullopt;
+static void
+genCompositeDoSimd(Fortran::lower::AbstractConverter &converter,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OmpClauseList &beginClauseList,
+                   const Fortran::parser::OmpClauseList *endClauseList,
+                   mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  WsloopOpClauseOps wsLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList,
+                   currentLocation, wsLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genWsloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite DO SIMD not implemented");
 }
 
 static void
-genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
-                       Fortran::semantics::SemanticsContext &semaCtx,
-                       Fortran::lower::pft::Evaluation &eval, bool genNested,
-                       const Fortran::parser::OpenMPSimpleStandaloneConstruct
-                           &simpleStandaloneConstruct) {
+genCompositeTaskLoopSimd(Fortran::lower::AbstractConverter &converter,
+                         Fortran::semantics::SemanticsContext &semaCtx,
+                         Fortran::lower::pft::Evaluation &eval,
+                         const Fortran::parser::OmpClauseList &beginClauseList,
+                         const Fortran::parser::OmpClauseList *endClauseList,
+                         mlir::Location currentLocation) {
+  Fortran::lower::StatementContext stmtCtx;
+  TaskLoopOpClauseOps taskLoopClauseOps;
+  SimdLoopOpClauseOps simdClauseOps;
+
+  genTaskLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, taskLoopClauseOps);
+  genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList,
+                     currentLocation, simdClauseOps);
+
+  // TODO Pass clauseOps structures to generate wrappers
+  // genTaskloopOp();
+  // genSimdLoopOp();
+  TODO(currentLocation, "Composite TASKLOOP SIMD not implemented");
+}
+
+//===----------------------------------------------------------------------===//
+// genOMP() Code generation functions
+//===----------------------------------------------------------------------===//
+
+static void genOMP(Fortran::lower::AbstractConverter &converter,
+                   Fortran::lower::SymMap &symTable,
+                   Fortran::semantics::SemanticsContext &semaCtx,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                       &simpleStandaloneConstruct) {
   const auto &directive =
       std::get<Fortran::parser::OmpSimpleStandaloneDirective>(
           simpleStandaloneConstruct.t);
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  const auto &opClauseList =
+  const auto &clauseList =
       std::get<Fortran::parser::OmpClauseList>(simpleStandaloneConstruct.t);
   mlir::Location currentLocation = converter.genLocation(directive.source);
 
@@ -1588,33 +2204,29 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
   default:
     break;
   case llvm::omp::Directive::OMPD_barrier:
-    firOpBuilder.create<mlir::omp::BarrierOp>(currentLocation);
+    genBarrierOp(converter, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    ClauseProcessor(converter, semaCtx, opClauseList)
-        .processTODO<Fortran::parser::OmpClause::Depend,
-                     Fortran::parser::OmpClause::Nowait>(
-            currentLocation, llvm::omp::Directive::OMPD_taskwait);
-    firOpBuilder.create<mlir::omp::TaskwaitOp>(currentLocation);
+    genTaskWaitOp(converter, semaCtx, eval, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_taskyield:
-    firOpBuilder.create<mlir::omp::TaskyieldOp>(currentLocation);
+    genTaskYieldOp(converter, semaCtx, eval, currentLocation);
     break;
   case llvm::omp::Directive::OMPD_target_data:
-    genDataOp(converter, semaCtx, eval, genNested, currentLocation,
-              opClauseList);
+    genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
+              clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_enter_data:
     genEnterExitUpdateDataOp<mlir::omp::EnterDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_exit_data:
     genEnterExitUpdateDataOp<mlir::omp::ExitDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_target_update:
     genEnterExitUpdateDataOp<mlir::omp::UpdateDataOp>(
-        converter, semaCtx, currentLocation, opClauseList);
+        converter, semaCtx, currentLocation, clauseList);
     break;
   case llvm::omp::Directive::OMPD_ordered:
     TODO(currentLocation, "OMPD_ordered");
@@ -1622,28 +2234,21 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter,
 }
 
 static void
-genOmpFlush(Fortran::lower::AbstractConverter &converter,
-            Fortran::semantics::SemanticsContext &semaCtx,
-            Fortran::lower::pft::Evaluation &eval,
-            const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
-  llvm::SmallVector<mlir::Value, 4> operandRange;
-  if (const auto &ompObjectList =
-          std::get<std::optional<Fortran::parser::OmpObjectList>>(
-              flushConstruct.t))
-    genObjectList(*ompObjectList, converter, operandRange);
-  const auto &memOrderClause =
+genOMP(Fortran::lower::AbstractConverter &converter,
+       Fortran::lower::SymMap &symTable,
+       Fortran::semantics::SemanticsContext &semaCtx,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
+  const auto &verbatim = std::get<Fortran::parser::Verbatim>(flushConstruct.t);
+  const auto &objectList =
+      std::get<std::optional<Fortran::parser::OmpObjectList>>(flushConstruct.t);
+  const auto &clauseList =
       std::get<std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>>(
           flushConstruct.t);
-  if (memOrderClause && memOrderClause->size() > 0)
-    TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause");
-  converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
-      converter.getCurrentLocation(), operandRange);
+  mlir::Location currentLocation = converter.genLocation(verbatim.source);
+  genFlushOp(converter, semaCtx, eval, currentLocation, objectList, clauseList);
 }
 
-//===----------------------------------------------------------------------===//
-// genOMP() Code generation helper functions
-//===----------------------------------------------------------------------===//
-
 static void
 genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::SymMap &symTable,
@@ -1654,12 +2259,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       Fortran::common::visitors{
           [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct
                   &simpleStandaloneConstruct) {
-            genOmpSimpleStandalone(converter, semaCtx, eval,
-                                   /*genNested=*/true,
-                                   simpleStandaloneConstruct);
+            genOMP(converter, symTable, semaCtx, eval,
+                   simpleStandaloneConstruct);
           },
           [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
-            genOmpFlush(converter, semaCtx, eval, flushConstruct);
+            genOMP(converter, symTable, semaCtx, eval, flushConstruct);
           },
           [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) {
             TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
@@ -1672,300 +2276,6 @@ genOMP(Fortran::lower::AbstractConverter &converter,
       standaloneConstruct.u);
 }
 
-static void convertLoopBounds(Fortran::lower::AbstractConverter &converter,
-                              mlir::Location loc,
-                              llvm::SmallVectorImpl<mlir::Value> &lowerBound,
-                              llvm::SmallVectorImpl<mlir::Value> &upperBound,
-                              llvm::SmallVectorImpl<mlir::Value> &step,
-                              std::size_t loopVarTypeSize) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  // The types of lower bound, upper bound, and step are converted into the
-  // type of the loop variable if necessary.
-  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-  for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) {
-    lowerBound[it] =
-        firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]);
-    upperBound[it] =
-        firOpBuilder.createConvert(loc, loopVarType, upperBound[it]);
-    step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]);
-  }
-}
-
-static llvm::SmallVector<const Fortran::semantics::Symbol *>
-genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
-            mlir::Location &loc,
-            const llvm::SmallVector<const Fortran::semantics::Symbol *> &args) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  auto &region = op->getRegion(0);
-
-  std::size_t loopVarTypeSize = 0;
-  for (const Fortran::semantics::Symbol *arg : args)
-    loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
-  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-  llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
-  llvm::SmallVector<mlir::Location> locs(args.size(), loc);
-  firOpBuilder.createBlock(&region, {}, tiv, locs);
-  // The argument is not currently in memory, so make a temporary for the
-  // argument, and store it there, then bind that location to the argument.
-  mlir::Operation *storeOp = nullptr;
-  for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
-    mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
-    storeOp =
-        createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
-  }
-  firOpBuilder.setInsertionPointAfter(storeOp);
-
-  return args;
-}
-
-static llvm::SmallVector<const Fortran::semantics::Symbol *>
-genLoopAndReductionVars(
-    mlir::Operation *op, Fortran::lower::AbstractConverter &converter,
-    mlir::Location &loc,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &loopArgs,
-    const llvm::SmallVector<const Fortran::semantics::Symbol *> &reductionArgs,
-    llvm::SmallVector<mlir::Type> &reductionTypes) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-
-  llvm::SmallVector<mlir::Type> blockArgTypes;
-  llvm::SmallVector<mlir::Location> blockArgLocs;
-  blockArgTypes.reserve(loopArgs.size() + reductionArgs.size());
-  blockArgLocs.reserve(blockArgTypes.size());
-  mlir::Block *entryBlock;
-
-  if (loopArgs.size()) {
-    std::size_t loopVarTypeSize = 0;
-    for (const Fortran::semantics::Symbol *arg : loopArgs)
-      loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
-    mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
-    std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(),
-                loopVarType);
-    std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc);
-  }
-  if (reductionArgs.size()) {
-    llvm::copy(reductionTypes, std::back_inserter(blockArgTypes));
-    std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc);
-  }
-  entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes,
-                                        blockArgLocs);
-  // The argument is not currently in memory, so make a temporary for the
-  // argument, and store it there, then bind that location to the argument.
-  if (loopArgs.size()) {
-    mlir::Operation *storeOp = nullptr;
-    for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) {
-      mlir::Value indexVal =
-          fir::getBase(op->getRegion(0).front().getArgument(argIndex));
-      storeOp =
-          createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
-    }
-    firOpBuilder.setInsertionPointAfter(storeOp);
-  }
-  // Bind the reduction arguments to their block arguments
-  for (auto [arg, prv] : llvm::zip_equal(
-           reductionArgs,
-           llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) {
-    converter.bindSymbol(*arg, prv);
-  }
-
-  return loopArgs;
-}
-
-static void
-createSimdLoop(Fortran::lower::AbstractConverter &converter,
-               Fortran::semantics::SemanticsContext &semaCtx,
-               Fortran::lower::pft::Evaluation &eval,
-               llvm::omp::Directive ompDirective,
-               const Fortran::parser::OmpClauseList &loopOpClauseList,
-               mlir::Location loc) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  DataSharingProcessor dsp(converter, loopOpClauseList, eval);
-  dsp.processStep1();
-
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
-  llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
-  llvm::SmallVector<mlir::Value> alignedVars, nontemporalVars;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  mlir::omp::ClauseOrderKindAttr orderClauseOperand;
-  mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand;
-  std::size_t loopVarTypeSize;
-
-  ClauseProcessor cp(converter, semaCtx, loopOpClauseList);
-  cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
-                     loopVarTypeSize);
-  cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
-  cp.processReduction(loc, reductionVars, reductionDeclSymbols);
-  cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd,
-               ifClauseOperand);
-  cp.processSimdlen(simdlenClauseOperand);
-  cp.processSafelen(safelenClauseOperand);
-  cp.processTODO<Fortran::parser::OmpClause::Aligned,
-                 Fortran::parser::OmpClause::Allocate,
-                 Fortran::parser::OmpClause::Linear,
-                 Fortran::parser::OmpClause::Nontemporal,
-                 Fortran::parser::OmpClause::Order>(loc, ompDirective);
-
-  convertLoopBounds(converter, loc, lowerBound, upperBound, step,
-                    loopVarTypeSize);
-
-  mlir::TypeRange resultType;
-  auto simdLoopOp = firOpBuilder.create<mlir::omp::SimdLoopOp>(
-      loc, resultType, lowerBound, upperBound, step, alignedVars,
-      /*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars,
-      orderClauseOperand, simdlenClauseOperand, safelenClauseOperand,
-      /*inclusive=*/firOpBuilder.getUnitAttr());
-
-  auto *nestedEval = getCollapsedLoopEval(
-      eval, Fortran::lower::getCollapseValue(loopOpClauseList));
-
-  auto ivCallback = [&](mlir::Operation *op) {
-    return genLoopVars(op, converter, loc, iv);
-  };
-
-  createBodyOfOp<mlir::omp::SimdLoopOp>(
-      simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
-                      .setClauses(&loopOpClauseList)
-                      .setDataSharingProcessor(&dsp)
-                      .setGenRegionEntryCb(ivCallback));
-}
-
-static void createWsLoop(Fortran::lower::AbstractConverter &converter,
-                         Fortran::semantics::SemanticsContext &semaCtx,
-                         Fortran::lower::pft::Evaluation &eval,
-                         llvm::omp::Directive ompDirective,
-                         const Fortran::parser::OmpClauseList &beginClauseList,
-                         const Fortran::parser::OmpClauseList *endClauseList,
-                         mlir::Location loc) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  DataSharingProcessor dsp(converter, beginClauseList, eval);
-  dsp.processStep1();
-
-  Fortran::lower::StatementContext stmtCtx;
-  mlir::Value scheduleChunkClauseOperand;
-  llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, reductionVars;
-  llvm::SmallVector<mlir::Value> linearVars, linearStepVars;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> iv;
-  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
-  llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSymbols;
-  mlir::omp::ClauseOrderKindAttr orderClauseOperand;
-  mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand;
-  mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand;
-  mlir::IntegerAttr orderedClauseOperand;
-  mlir::omp::ScheduleModifierAttr scheduleModClauseOperand;
-  std::size_t loopVarTypeSize;
-
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv,
-                     loopVarTypeSize);
-  cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand);
-  cp.processReduction(loc, reductionVars, reductionDeclSymbols,
-                      &reductionSymbols);
-  cp.processTODO<Fortran::parser::OmpClause::Linear,
-                 Fortran::parser::OmpClause::Order>(loc, ompDirective);
-
-  convertLoopBounds(converter, loc, lowerBound, upperBound, step,
-                    loopVarTypeSize);
-
-  auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
-      loc, lowerBound, upperBound, step, linearVars, linearStepVars,
-      reductionVars,
-      reductionDeclSymbols.empty()
-          ? nullptr
-          : mlir::ArrayAttr::get(firOpBuilder.getContext(),
-                                 reductionDeclSymbols),
-      scheduleValClauseOperand, scheduleChunkClauseOperand,
-      /*schedule_modifiers=*/nullptr,
-      /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand,
-      orderClauseOperand,
-      /*inclusive=*/firOpBuilder.getUnitAttr());
-
-  // Handle attribute based clauses.
-  if (cp.processOrdered(orderedClauseOperand))
-    wsLoopOp.setOrderedValAttr(orderedClauseOperand);
-
-  if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand,
-                         scheduleSimdClauseOperand)) {
-    wsLoopOp.setScheduleValAttr(scheduleValClauseOperand);
-    wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand);
-    wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand);
-  }
-  // In FORTRAN `nowait` clause occur at the end of `omp do` directive.
-  // i.e
-  // !$omp do
-  // <...>
-  // !$omp end do nowait
-  if (endClauseList) {
-    if (ClauseProcessor(converter, semaCtx, *endClauseList)
-            .processNowait(nowaitClauseOperand))
-      wsLoopOp.setNowaitAttr(nowaitClauseOperand);
-  }
-
-  auto *nestedEval = getCollapsedLoopEval(
-      eval, Fortran::lower::getCollapseValue(beginClauseList));
-
-  llvm::SmallVector<mlir::Type> reductionTypes;
-  reductionTypes.reserve(reductionVars.size());
-  llvm::transform(reductionVars, std::back_inserter(reductionTypes),
-                  [](mlir::Value v) { return v.getType(); });
-
-  auto ivCallback = [&](mlir::Operation *op) {
-    return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols,
-                                   reductionTypes);
-  };
-
-  createBodyOfOp<mlir::omp::WsLoopOp>(
-      wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
-                    .setClauses(&beginClauseList)
-                    .setDataSharingProcessor(&dsp)
-                    .setReductions(&reductionSymbols, &reductionTypes)
-                    .setGenRegionEntryCb(ivCallback));
-
-  // Create trip_count if inside of omp.target and this is host compilation
-  auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-      firOpBuilder.getModule().getOperation());
-  auto targetOp = wsLoopOp->getParentOfType<mlir::omp::TargetOp>();
-
-  if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() &&
-      targetOp.isTargetSPMDLoop()) {
-    // Lower loop bounds and step, and process collapsing again, putting lowered
-    // values outside of omp.target this time. This enables calculating and
-    // accessing the trip count in the host, which is needed when lowering to
-    // LLVM IR via the OMPIRBuilder.
-    HostClausesInsertionGuard guard(firOpBuilder);
-    llvm::SmallVector<mlir::Value> outsideLB, outsideUB, outsideStep;
-    llvm::SmallVector<const Fortran::semantics::Symbol *> outsideIV;
-    cp.processCollapse(loc, eval, outsideLB, outsideUB, outsideStep, outsideIV,
-                       loopVarTypeSize);
-    targetOp.getTripCountMutable().assign(
-        calculateTripCount(converter, loc, outsideLB, outsideUB, outsideStep));
-  }
-}
-
-static void createSimdWsLoop(
-    Fortran::lower::AbstractConverter &converter,
-    Fortran::semantics::SemanticsContext &semaCtx,
-    Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective,
-    const Fortran::parser::OmpClauseList &beginClauseList,
-    const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) {
-  ClauseProcessor cp(converter, semaCtx, beginClauseList);
-  cp.processTODO<
-      Fortran::parser::OmpClause::Aligned, Fortran::parser::OmpClause::Allocate,
-      Fortran::parser::OmpClause::Linear, Fortran::parser::OmpClause::Safelen,
-      Fortran::parser::OmpClause::Simdlen, Fortran::parser::OmpClause::Order>(
-      loc, ompDirective);
-  // TODO: Add support for vectorization - add vectorization hints inside loop
-  // body.
-  // OpenMP standard does not specify the length of vector instructions.
-  // Currently we safely assume that for !$omp do simd pragma the SIMD length
-  // is equal to 1 (i.e. we generate standard workshare loop).
-  // When support for vectorization is enabled, then we need to add handling of
-  // if clause. Currently if clause can be skipped because we always assume
-  // SIMD length = 1.
-  createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList,
-               endClauseList, loc);
-}
-
 static void genOMP(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::SymMap &symTable,
                    Fortran::semantics::SemanticsContext &semaCtx,
@@ -1973,13 +2283,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
                    const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
   const auto &beginLoopDirective =
       std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t);
-  const auto &loopOpClauseList =
+  const auto &beginClauseList =
       std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t);
   mlir::Location currentLocation =
       converter.genLocation(beginLoopDirective.source);
-  const auto ompDirective =
+  llvm::omp::Directive origDirective =
       std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v;
 
+  assert(llvm::omp::loopConstructSet.test(origDirective) &&
+         "Expected loop construct");
+
   const auto *endClauseList = [&]() {
     using RetTy = const Fortran::parser::OmpClauseList *;
     if (auto &endLoopDirective =
@@ -1991,61 +2304,191 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     return RetTy();
   }();
 
-  bool validDirective = false;
-  if (llvm::omp::topTaskloopSet.test(ompDirective)) {
-    validDirective = true;
-    TODO(currentLocation, "Taskloop construct");
-  } else {
-    // Create omp.{target, teams, distribute, parallel} nested operations
-    if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genTargetOp(converter, semaCtx, eval, /*genNested=*/false,
-                  currentLocation, loopOpClauseList, ompDirective,
-                  /*outerCombined=*/true);
+  /// Utility to remove the first leaf construct from a combined loop construct.
+  /// Composite constructs are not handled, as they cannot be split in that way.
+  auto peelCombinedLoopDirective =
+      [](llvm::omp::Directive dir) -> llvm::omp::Directive {
+    using D = llvm::omp::Directive;
+    switch (dir) {
+    case D::OMPD_masked_taskloop:
+    case D::OMPD_master_taskloop:
+      return D::OMPD_taskloop;
+    case D::OMPD_masked_taskloop_simd:
+    case D::OMPD_master_taskloop_simd:
+      return D::OMPD_taskloop_simd;
+    case D::OMPD_parallel_do:
+      return D::OMPD_do;
+    case D::OMPD_parallel_do_simd:
+      return D::OMPD_do_simd;
+    case D::OMPD_parallel_masked_taskloop:
+      return D::OMPD_masked_taskloop;
+    case D::OMPD_parallel_master_taskloop:
+      return D::OMPD_master_taskloop;
+    case D::OMPD_parallel_masked_taskloop_simd:
+      return D::OMPD_masked_taskloop_simd;
+    case D::OMPD_parallel_master_taskloop_simd:
+      return D::OMPD_master_taskloop_simd;
+    case D::OMPD_target_parallel_do:
+      return D::OMPD_parallel_do;
+    case D::OMPD_target_parallel_do_simd:
+      return D::OMPD_parallel_do_simd;
+    case D::OMPD_target_simd:
+      return D::OMPD_simd;
+    case D::OMPD_target_teams_distribute:
+      return D::OMPD_teams_distribute;
+    case D::OMPD_target_teams_distribute_parallel_do:
+      return D::OMPD_teams_distribute_parallel_do;
+    case D::OMPD_target_teams_distribute_parallel_do_simd:
+      return D::OMPD_teams_distribute_parallel_do_simd;
+    case D::OMPD_target_teams_distribute_simd:
+      return D::OMPD_teams_distribute_simd;
+    case D::OMPD_teams_distribute:
+      return D::OMPD_distribute;
+    case D::OMPD_teams_distribute_parallel_do:
+      return D::OMPD_distribute_parallel_do;
+    case D::OMPD_teams_distribute_parallel_do_simd:
+      return D::OMPD_distribute_parallel_do_simd;
+    case D::OMPD_teams_distribute_simd:
+      return D::OMPD_distribute_simd;
+    case D::OMPD_parallel_loop:
+    case D::OMPD_teams_loop:
+      return D::OMPD_loop;
+    case D::OMPD_target_parallel_loop:
+      return D::OMPD_parallel_loop;
+    case D::OMPD_target_teams_loop:
+      return D::OMPD_teams_loop;
+    default:
+      llvm_unreachable("Unexpected non-combined loop construct");
     }
-    if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
-                 loopOpClauseList, /*outerCombined=*/true);
-    }
-    if (llvm::omp::allDistributeSet.test(ompDirective)) {
-      validDirective = true;
-      bool outerCombined = llvm::omp::topDistributeSet.test(ompDirective);
-      genDistributeOp(converter, semaCtx, eval, /*genNested=*/false,
-                      currentLocation, loopOpClauseList, outerCombined);
-    }
-    if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
-            .test(ompDirective)) {
-      validDirective = true;
-      genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
-                    currentLocation, loopOpClauseList,
-                    /*outerCombined=*/true);
+  };
+
+  // Privatization and loop nest clause processing must be done before producing
+  // any wrappers and after combined constructs, so that any operations created
+  // are outside of the wrapper nest.
+  DataSharingProcessor dsp(converter, beginClauseList, eval);
+  LoopNestOpClauseOps clauseOps;
+  auto processLoopNestClauses = [&]() {
+    dsp.processStep1();
+    genLoopNestClauses(converter, semaCtx, eval, beginClauseList,
+                       currentLocation, clauseOps);
+  };
+
+  llvm::omp::Directive ompDirective = origDirective;
+  if (llvm::omp::topTargetSet.test(ompDirective)) {
+    // TODO Combined constructs: Call gen<X>Clauses and pass them in.
+    genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
+                beginClauseList, /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+  }
+
+  if (llvm::omp::topTeamsSet.test(ompDirective)) {
+    genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
+               beginClauseList, /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+  }
+
+  if (llvm::omp::topParallelSet.test(ompDirective)) {
+    genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
+                  /*isComposite=*/false, currentLocation, beginClauseList,
+                  /*outerCombined=*/true);
+    ompDirective = peelCombinedLoopDirective(ompDirective);
+    processLoopNestClauses();
+  } else {
+    processLoopNestClauses();
+
+    if (llvm::omp::topDistributeSet.test(ompDirective)) {
+      switch (ompDirective) {
+      case llvm::omp::Directive::OMPD_distribute:
+        genDistributeOp(converter, semaCtx, eval, /*isComposite=*/false,
+                        currentLocation, beginClauseList,
+                        /*outerCombined=*/true);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_parallel_do:
+        genCompositeDistributeParallelDo(converter, semaCtx, eval,
+                                         beginClauseList, endClauseList,
+                                         currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
+        genCompositeDistributeParallelDoSimd(converter, semaCtx, eval,
+                                             beginClauseList, endClauseList,
+                                             currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_distribute_simd:
+        genCompositeDistributeSimd(converter, semaCtx, eval, beginClauseList,
+                                   endClauseList, currentLocation);
+        break;
+      default:
+        llvm_unreachable("Unexpected DISTRIBUTE construct");
+      }
+    } else if (llvm::omp::topTaskloopSet.test(ompDirective)) {
+      switch (ompDirective) {
+      case llvm::omp::Directive::OMPD_taskloop_simd:
+        genCompositeTaskLoopSimd(converter, semaCtx, eval, beginClauseList,
+                                 endClauseList, currentLocation);
+        break;
+      case llvm::omp::Directive::OMPD_taskloop:
+        genTaskLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                      currentLocation, beginClauseList);
+        break;
+      default:
+        llvm_unreachable("Unexpected TASKLOOP construct");
+      }
+    } else if (ompDirective == llvm::omp::Directive::OMPD_simd) {
+      genSimdLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                    currentLocation, beginClauseList);
+    } else if (!llvm::omp::topDoSet.test(ompDirective)) {
+      TODO(currentLocation,
+           "Unhandled loop directive (" +
+               llvm::omp::getOpenMPDirectiveName(origDirective) + ")");
     }
   }
-  if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective))
-    validDirective = true;
 
-  if (!validDirective) {
+  if (llvm::omp::topDoSet.test(ompDirective)) {
+    switch (ompDirective) {
+    case llvm::omp::Directive::OMPD_do_simd:
+      genCompositeDoSimd(converter, semaCtx, eval, beginClauseList,
+                         endClauseList, currentLocation);
+      break;
+    case llvm::omp::Directive::OMPD_do:
+      genWsLoopOp(converter, semaCtx, eval, /*isComposite=*/false,
+                  currentLocation, beginClauseList, endClauseList);
+      break;
+    default:
+      llvm_unreachable("Unexpected DO construct");
+    }
+  } else if (llvm::omp::allParallelSet.test(origDirective)) {
     TODO(currentLocation, "Unhandled loop directive (" +
-                              llvm::omp::getOpenMPDirectiveName(ompDirective) +
+                              llvm::omp::getOpenMPDirectiveName(origDirective) +
                               ")");
   }
 
-  if (llvm::omp::allDoSimdSet.test(ompDirective)) {
-    // 2.9.3.2 Workshare SIMD construct
-    createSimdWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                     endClauseList, currentLocation);
+  // Create inner loop nest and body.
+  mlir::omp::LoopNestOp loopNestOp =
+      genLoopNestOp(converter, semaCtx, eval, currentLocation, beginClauseList,
+                    clauseOps, dsp);
 
-  } else if (llvm::omp::allSimdSet.test(ompDirective)) {
-    // 2.9.3.1 SIMD construct
-    createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                   currentLocation);
-    genOpenMPReduction(converter, semaCtx, loopOpClauseList);
-  } else {
-    createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
-                 endClauseList, currentLocation);
+  if (ompDirective == llvm::omp::Directive::OMPD_simd)
+    genOpenMPReduction(converter, semaCtx, beginClauseList);
+
+  // Create trip_count outside of omp.target if this is host compilation and the
+  // loop is inside of a target region.
+  auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
+      converter.getModuleOp().getOperation());
+  auto targetOp = loopNestOp->getParentOfType<mlir::omp::TargetOp>();
+
+  if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() &&
+      targetOp.isTargetSPMDLoop()) {
+    // Lower loop bounds and step, and process collapsing again, putting lowered
+    // values outside of omp.target this time. This enables calculating and
+    // accessing the trip count in the host, which is needed when lowering to
+    // LLVM IR via the OMPIRBuilder.
+    HostClausesInsertionGuard guard(converter.getFirOpBuilder());
+    CollapseClauseOps collapseOps;
+    ClauseProcessor(converter, semaCtx, beginClauseList)
+        .processCollapse(currentLocation, eval, collapseOps);
+    targetOp.getTripCountMutable().assign(
+        calculateTripCount(converter, currentLocation, collapseOps.loopLBVar,
+                           collapseOps.loopUBVar, collapseOps.loopStepVar));
   }
 }
 
@@ -2066,6 +2509,9 @@ genOMP(Fortran::lower::AbstractConverter &converter,
   const auto &endClauseList =
       std::get<Fortran::parser::OmpClauseList>(endBlockDirective.t);
 
+  assert(llvm::omp::blockConstructSet.test(directive.v) &&
+         "Expected block construct");
+
   for (const Fortran::parser::OmpClause &clause : beginClauseList.v) {
     mlir::Location clauseLocation = converter.genLocation(clause.source);
     if (!std::get_if<Fortran::parser::OmpClause::If>(&clause.u) &&
@@ -2106,11 +2552,11 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_ordered:
     genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true,
-                       currentLocation);
+                       currentLocation, beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_parallel:
     genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true,
-                  currentLocation, beginClauseList);
+                  /*isComposite=*/false, currentLocation, beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_single:
     genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -2118,7 +2564,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
     break;
   case llvm::omp::Directive::OMPD_target:
     genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
-                beginClauseList, directive.v);
+                beginClauseList);
     break;
   case llvm::omp::Directive::OMPD_target_data:
     genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
@@ -2153,29 +2599,25 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 
   // Codegen for combined directives
   bool combinedDirective = false;
-  if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allTargetSet.test(directive.v)) {
     genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
-                beginClauseList, directive.v,
-                /*outerCombined=*/true);
+                beginClauseList, /*outerCombined=*/true);
     combinedDirective = true;
   }
-  if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allTeamsSet.test(directive.v)) {
     genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
                beginClauseList);
     combinedDirective = true;
   }
-  if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::allParallelSet.test(directive.v)) {
     bool outerCombined =
         directive.v != llvm::omp::Directive::OMPD_target_parallel;
     genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false,
-                  currentLocation, beginClauseList, outerCombined);
+                  /*isComposite=*/false, currentLocation, beginClauseList,
+                  outerCombined);
     combinedDirective = true;
   }
-  if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
-          .test(directive.v)) {
+  if (llvm::omp::workShareSet.test(directive.v)) {
     genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation,
                 beginClauseList, endClauseList);
     combinedDirective = true;
@@ -2194,38 +2636,13 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) {
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  mlir::Location currentLocation = converter.getCurrentLocation();
-  mlir::IntegerAttr hintClauseOp;
-  std::string name;
-  const Fortran::parser::OmpCriticalDirective &cd =
+  const auto &cd =
       std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t);
-  if (std::get<std::optional<Fortran::parser::Name>>(cd.t).has_value()) {
-    name =
-        std::get<std::optional<Fortran::parser::Name>>(cd.t).value().ToString();
-  }
-
   const auto &clauseList = std::get<Fortran::parser::OmpClauseList>(cd.t);
-  ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp);
-
-  mlir::omp::CriticalOp criticalOp = [&]() {
-    if (name.empty()) {
-      return firOpBuilder.create<mlir::omp::CriticalOp>(
-          currentLocation, mlir::FlatSymbolRefAttr());
-    }
-    mlir::ModuleOp module = firOpBuilder.getModule();
-    mlir::OpBuilder modBuilder(module.getBodyRegion());
-    auto global = module.lookupSymbol<mlir::omp::CriticalDeclareOp>(name);
-    if (!global)
-      global = modBuilder.create<mlir::omp::CriticalDeclareOp>(
-          currentLocation,
-          mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp);
-    return firOpBuilder.create<mlir::omp::CriticalOp>(
-        currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
-                                                      global.getSymName()));
-  }();
-  auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval);
-  createBodyOfOp<mlir::omp::CriticalOp>(criticalOp, genInfo);
+  const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t);
+  mlir::Location currentLocation = converter.getCurrentLocation();
+  genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation,
+                clauseList, name);
 }
 
 static void
@@ -2234,55 +2651,48 @@ genOMP(Fortran::lower::AbstractConverter &converter,
        Fortran::semantics::SemanticsContext &semaCtx,
        Fortran::lower::pft::Evaluation &eval,
        const Fortran::parser::OpenMPSectionsConstruct &sectionsConstruct) {
-  mlir::Location currentLocation = converter.getCurrentLocation();
-  llvm::SmallVector<mlir::Value> allocateOperands, allocatorOperands;
-  mlir::UnitAttr nowaitClauseOperand;
   const auto &beginSectionsDirective =
       std::get<Fortran::parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
-  const auto &sectionsClauseList =
+  const auto &beginClauseList =
       std::get<Fortran::parser::OmpClauseList>(beginSectionsDirective.t);
-
-  // Process clauses before optional omp.parallel, so that new variables are
-  // allocated outside of the parallel region
-  ClauseProcessor cp(converter, semaCtx, sectionsClauseList);
-  cp.processSectionsReduction(currentLocation);
-  cp.processAllocate(allocatorOperands, allocateOperands);
-
   llvm::omp::Directive dir =
       std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t)
           .v;
+  const auto &sectionBlocks =
+      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
+
+  // Process clauses before optional omp.parallel, so that new variables are
+  // allocated outside of the parallel region.
+  mlir::Location currentLocation = converter.getCurrentLocation();
+  SectionsOpClauseOps clauseOps;
+  genSectionsClauses(converter, semaCtx, beginClauseList, currentLocation,
+                     /*clausesFromBeginSections=*/true, clauseOps);
 
-  // Parallel wrapper of PARALLEL SECTIONS construct
+  // Parallel wrapper of PARALLEL SECTIONS construct.
   if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
     genParallelOp(converter, symTable, semaCtx, eval,
-                  /*genNested=*/false, currentLocation, sectionsClauseList,
-                  /*outerCombined=*/true);
+                  /*genNested=*/false, /*isComposite=*/false, currentLocation,
+                  beginClauseList, /*outerCombined=*/true);
   } else {
     const auto &endSectionsDirective =
         std::get<Fortran::parser::OmpEndSectionsDirective>(sectionsConstruct.t);
-    const auto &endSectionsClauseList =
+    const auto &endClauseList =
         std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t);
-    ClauseProcessor(converter, semaCtx, endSectionsClauseList)
-        .processNowait(nowaitClauseOperand);
+    genSectionsClauses(converter, semaCtx, endClauseList, currentLocation,
+                       /*clausesFromBeginSections=*/false, clauseOps);
   }
 
-  // SECTIONS construct
-  genOpWithBody<mlir::omp::SectionsOp>(
-      OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval)
-          .setGenNested(false),
-      /*reduction_vars=*/mlir::ValueRange(),
-      /*reductions=*/nullptr, allocateOperands, allocatorOperands,
-      nowaitClauseOperand);
+  // SECTIONS construct.
+  genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps);
 
-  const auto &sectionBlocks =
-      std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t);
+  // Generate nested SECTION operations recursively.
   auto &firOpBuilder = converter.getFirOpBuilder();
   auto ip = firOpBuilder.saveInsertionPoint();
   for (const auto &[nblock, neval] :
        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
     symTable.pushScope();
     genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation,
-                 sectionsClauseList);
+                 beginClauseList);
     symTable.popScope();
     firOpBuilder.restoreInsertionPoint(ip);
   }
@@ -2366,12 +2776,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
                    Fortran::lower::pft::Evaluation &eval,
                    const Fortran::parser::OpenMPDeclareTargetConstruct
                        &declareTargetConstruct) {
-  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
+  DeclareTargetOpClauseOps clauseOps;
   mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo(
-      converter, semaCtx, eval, declareTargetConstruct, symbolAndClause);
+      converter, semaCtx, eval, declareTargetConstruct, clauseOps);
 
-  for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
+  for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) {
     mlir::Operation *op = mod.lookupSymbol(converter.mangleName(
         std::get<const Fortran::semantics::Symbol &>(symClause)));
 
@@ -2482,11 +2892,10 @@ genOMP(Fortran::lower::AbstractConverter &converter,
 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
                                                      mlir::Operation *op,
                                                      mlir::Location loc) {
-  if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp,
-                mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
+  if (mlir::isa<mlir::omp::LoopNestOp, mlir::omp::ReductionDeclareOp,
+                mlir::omp::AtomicUpdateOp>(op))
     return builder.create<mlir::omp::YieldOp>(loc);
-  else
-    return builder.create<mlir::omp::TerminatorOp>(loc);
+  return builder.create<mlir::omp::TerminatorOp>(loc);
 }
 
 void Fortran::lower::genOpenMPConstruct(
diff --git a/flang/lib/Lower/OpenMP/OperationClauses.h b/flang/lib/Lower/OpenMP/OperationClauses.h
new file mode 100644
index 0000000000000..ed49c3f0750b1
--- /dev/null
+++ b/flang/lib/Lower/OpenMP/OperationClauses.h
@@ -0,0 +1,306 @@
+//===-- Lower/OpenMP/OperationClauses.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include <optional>
+
+namespace Fortran {
+namespace semantics {
+class Symbol;
+} // namespace semantics
+} // namespace Fortran
+
+namespace Fortran {
+namespace lower {
+namespace omp {
+
+//===----------------------------------------------------------------------===//
+// Mixin structures defining operands associated with each OpenMP clause.
+//===----------------------------------------------------------------------===//
+
+struct AlignedClauseOps {
+  llvm::SmallVector<mlir::Value> alignedVars;
+  llvm::SmallVector<mlir::Attribute> alignmentAttrs;
+};
+
+struct AllocateClauseOps {
+  llvm::SmallVector<mlir::Value> allocatorVars, allocateVars;
+};
+
+struct CollapseClauseOps {
+  llvm::SmallVector<mlir::Value> loopLBVar, loopUBVar, loopStepVar;
+  llvm::SmallVector<const Fortran::semantics::Symbol *> loopIV;
+};
+
+struct CopyinClauseOps {};
+
+struct CopyprivateClauseOps {
+  llvm::SmallVector<mlir::Value> copyprivateVars;
+  llvm::SmallVector<mlir::Attribute> copyprivateFuncs;
+};
+
+struct DependClauseOps {
+  llvm::SmallVector<mlir::Attribute> dependTypeAttrs;
+  llvm::SmallVector<mlir::Value> dependVars;
+};
+
+struct DeviceClauseOps {
+  mlir::Value deviceVar;
+};
+
+struct DeviceTypeClauseOps {
+  mlir::omp::DeclareTargetDeviceType deviceType;
+};
+
+struct DistScheduleClauseOps {
+  mlir::UnitAttr distScheduleStaticAttr;
+  mlir::Value distScheduleChunkSizeVar;
+};
+
+struct EnterLinkToClauseOps {
+  llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
+};
+
+struct FinalClauseOps {
+  mlir::Value finalVar;
+};
+
+struct GrainsizeClauseOps {
+  mlir::Value grainsizeVar;
+};
+
+struct HintClauseOps {
+  mlir::IntegerAttr hintAttr;
+};
+
+struct IfClauseOps {
+  mlir::Value ifVar;
+};
+
+struct InReductionClauseOps {
+  llvm::SmallVector<mlir::Value> inReductionVars;
+  llvm::SmallVector<mlir::Type> inReductionTypes;
+  llvm::SmallVector<mlir::Attribute> inReductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      inReductionSymbols;
+};
+
+struct LinearClauseOps {
+  llvm::SmallVector<mlir::Value> linearVars, linearStepVars;
+};
+
+// The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to
+// store the original type, location and Fortran symbol for the map operands.
+// They may be used later on to create the block_arguments for some of the
+// target directives that require it.
+struct MapClauseOps {
+  llvm::SmallVector<mlir::Value> mapVars;
+  std::optional<llvm::SmallVector<mlir::Type>> mapSymTypes;
+  std::optional<llvm::SmallVector<mlir::Location>> mapSymLocs;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      mapSymbols;
+};
+
+struct MergeableClauseOps {
+  mlir::UnitAttr mergeableAttr;
+};
+
+struct NogroupClauseOps {
+  mlir::UnitAttr nogroupAttr;
+};
+
+struct NontemporalClauseOps {
+  llvm::SmallVector<mlir::Value> nontemporalVars;
+};
+
+struct NowaitClauseOps {
+  mlir::UnitAttr nowaitAttr;
+};
+
+struct NumTasksClauseOps {
+  mlir::Value numTasksVar;
+};
+
+struct NumTeamsClauseOps {
+  mlir::Value numTeamsLowerVar;
+  mlir::Value numTeamsUpperVar;
+};
+
+struct NumThreadsClauseOps {
+  mlir::Value numThreadsVar;
+};
+
+struct OrderClauseOps {
+  mlir::omp::ClauseOrderKindAttr orderAttr;
+};
+
+struct OrderedClauseOps {
+  mlir::IntegerAttr orderedAttr;
+};
+
+struct ParallelizationLevelClauseOps {
+  mlir::UnitAttr parLevelThreadsAttr;
+  mlir::UnitAttr parLevelSimdAttr;
+};
+
+struct PriorityClauseOps {
+  mlir::Value priorityVar;
+};
+
+struct PrivateClauseOps {
+  llvm::SmallVector<mlir::Value> privateVars;
+  llvm::SmallVector<mlir::Attribute> privatizers;
+};
+
+struct ProcBindClauseOps {
+  mlir::omp::ClauseProcBindKindAttr procBindKindAttr;
+};
+
+struct ReductionClauseOps {
+  llvm::SmallVector<mlir::Value> reductionVars;
+  llvm::SmallVector<mlir::Type> reductionTypes;
+  llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      reductionSymbols;
+};
+
+struct SafelenClauseOps {
+  mlir::IntegerAttr safelenAttr;
+};
+
+struct ScheduleClauseOps {
+  mlir::omp::ClauseScheduleKindAttr scheduleValAttr;
+  mlir::omp::ScheduleModifierAttr scheduleModAttr;
+  mlir::Value scheduleChunkVar;
+  mlir::UnitAttr scheduleSimdAttr;
+};
+
+struct SimdlenClauseOps {
+  mlir::IntegerAttr simdlenAttr;
+};
+
+struct TargetReductionClauseOps {
+  llvm::SmallVector<const Fortran::semantics::Symbol *> targetReductionSymbols;
+};
+
+struct TaskReductionClauseOps {
+  llvm::SmallVector<mlir::Value> taskReductionVars;
+  llvm::SmallVector<mlir::Type> taskReductionTypes;
+  llvm::SmallVector<mlir::Attribute> taskReductionDeclSymbols;
+  std::optional<llvm::SmallVector<const Fortran::semantics::Symbol *>>
+      taskReductionSymbols;
+};
+
+struct ThreadLimitClauseOps {
+  mlir::Value threadLimitVar;
+};
+
+struct UntiedClauseOps {
+  mlir::UnitAttr untiedAttr;
+};
+
+struct UseDeviceClauseOps {
+  llvm::SmallVector<mlir::Value> useDevicePtrVars;
+  llvm::SmallVector<mlir::Value> useDeviceAddrVars;
+  llvm::SmallVector<mlir::Type> useDeviceTypes;
+  llvm::SmallVector<mlir::Location> useDeviceLocs;
+  llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSymbols;
+};
+
+//===----------------------------------------------------------------------===//
+// Structures defining clause operands associated with each OpenMP leaf
+// construct.
+//
+// These mirror the arguments expected by the corresponding OpenMP MLIR ops.
+//===----------------------------------------------------------------------===//
+
+namespace detail {
+template <typename... Mixins>
+struct Clauses : public Mixins... {};
+} // namespace detail
+
+using CriticalDeclareOpClauseOps = detail::Clauses<HintClauseOps>;
+
+using DataOpClauseOps = detail::Clauses<DeviceClauseOps, IfClauseOps,
+                                        MapClauseOps, UseDeviceClauseOps>;
+
+using DeclareTargetOpClauseOps = detail::Clauses<EnterLinkToClauseOps>;
+
+using DistributeOpClauseOps =
+    detail::Clauses<AllocateClauseOps, DistScheduleClauseOps, OrderClauseOps>;
+
+using EnterExitUpdateDataOpClauseOps =
+    detail::Clauses<DependClauseOps, DeviceClauseOps, IfClauseOps, MapClauseOps,
+                    NowaitClauseOps>;
+
+using LoopNestOpClauseOps = detail::Clauses<CollapseClauseOps>;
+
+// TODO Rename to "masked"
+// TODO `filter` clause.
+using MasterOpClauseOps = detail::Clauses<>;
+
+using OrderedRegionOpClauseOps = detail::Clauses<ParallelizationLevelClauseOps>;
+
+using ParallelOpClauseOps =
+    detail::Clauses<AllocateClauseOps, IfClauseOps, NumThreadsClauseOps,
+                    PrivateClauseOps, ProcBindClauseOps, ReductionClauseOps>;
+
+using SectionsOpClauseOps =
+    detail::Clauses<AllocateClauseOps, NowaitClauseOps, ReductionClauseOps>;
+
+// TODO `linear` clause.
+using SimdLoopOpClauseOps =
+    detail::Clauses<AlignedClauseOps, IfClauseOps, NontemporalClauseOps,
+                    OrderClauseOps, ReductionClauseOps, SafelenClauseOps,
+                    SimdlenClauseOps>;
+
+using SingleOpClauseOps =
+    detail::Clauses<AllocateClauseOps, CopyprivateClauseOps, NowaitClauseOps>;
+
+// TODO `allocate`, `defaultmap`, `has_device_addr`, `in_reduction`,
+// `is_device_ptr`, `uses_allocators` clauses.
+using TargetOpClauseOps =
+    detail::Clauses<DependClauseOps, DeviceClauseOps, IfClauseOps, MapClauseOps,
+                    NowaitClauseOps, TargetReductionClauseOps,
+                    ThreadLimitClauseOps>;
+
+using TaskGroupOpClauseOps =
+    detail::Clauses<AllocateClauseOps, TaskReductionClauseOps>;
+
+using TaskLoopOpClauseOps =
+    detail::Clauses<AllocateClauseOps, FinalClauseOps, GrainsizeClauseOps,
+                    IfClauseOps, InReductionClauseOps, MergeableClauseOps,
+                    NogroupClauseOps, NumTasksClauseOps, PriorityClauseOps,
+                    ReductionClauseOps, UntiedClauseOps>;
+
+// TODO `affinity`, `detach` clauses.
+using TaskOpClauseOps =
+    detail::Clauses<AllocateClauseOps, DependClauseOps, FinalClauseOps,
+                    IfClauseOps, InReductionClauseOps, MergeableClauseOps,
+                    PriorityClauseOps, UntiedClauseOps>;
+
+// TODO `depend`, `nowait` clauses.
+using TaskWaitOpClauseOps = detail::Clauses<>;
+
+using TeamsOpClauseOps =
+    detail::Clauses<AllocateClauseOps, IfClauseOps, NumTeamsClauseOps,
+                    ReductionClauseOps, ThreadLimitClauseOps>;
+
+// TODO `allocate` clause.
+using WsloopOpClauseOps =
+    detail::Clauses<LinearClauseOps, NowaitClauseOps, OrderClauseOps,
+                    OrderedClauseOps, ReductionClauseOps, ScheduleClauseOps>;
+
+} // namespace omp
+} // namespace lower
+} // namespace Fortran
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 3b72148867874..2946d3ef2e6e1 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -60,7 +60,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
 
 void ReductionProcessor::addReductionSym(
     const Fortran::parser::OmpReductionClause &reduction,
-    llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols) {
+    llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols) {
   const auto &objectList{std::get<Fortran::parser::OmpObjectList>(reduction.t)};
 
   for (const Fortran::parser::OmpObject &ompObject : objectList.v) {
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 85c286ead5282..abbd737084dc5 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -101,7 +101,7 @@ class ReductionProcessor {
 
   static void addReductionSym(
       const Fortran::parser::OmpReductionClause &reduction,
-      llvm::SmallVector<const Fortran::semantics::Symbol *> &symbols);
+      llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &symbols);
 
   /// Creates an OpenMP reduction declaration and inserts it into the provided
   /// symbol table. The declaration has a constant initializer with the neutral
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index d1bf73ba1dfd2..2858412d77561 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -14,6 +14,7 @@
 
 #include <flang/Lower/AbstractConverter.h>
 #include <flang/Lower/ConvertType.h>
+#include <flang/Optimizer/Builder/FIRBuilder.h>
 #include <flang/Parser/parse-tree.h>
 #include <flang/Parser/tools.h>
 #include <flang/Semantics/tools.h>
@@ -67,6 +68,24 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList,
   }
 }
 
+mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
+                          std::size_t loopVarTypeSize) {
+  // OpenMP runtime requires 32-bit or 64-bit loop variables.
+  loopVarTypeSize = loopVarTypeSize * 8;
+  if (loopVarTypeSize < 32) {
+    loopVarTypeSize = 32;
+  } else if (loopVarTypeSize > 64) {
+    loopVarTypeSize = 64;
+    mlir::emitWarning(converter.getCurrentLocation(),
+                      "OpenMP loop iteration variable cannot have more than 64 "
+                      "bits size and will be narrowed into 64 bits.");
+  }
+  assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
+         "OpenMP loop iteration variable size must be transformed into 32-bit "
+         "or 64-bit");
+  return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
+}
+
 void gatherFuncAndVarSyms(
     const Fortran::parser::OmpObjectList &objList,
     mlir::omp::DeclareTargetCaptureClause clause,
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 00b0165cec554..369654e546290 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -68,6 +68,9 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList,
                    Fortran::lower::AbstractConverter &converter,
                    llvm::SmallVectorImpl<mlir::Value> &operands);
 
+mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
+                          std::size_t loopVarTypeSize);
+
 mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder);
 
 } // namespace omp
diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
index f5a3d925ab5d9..e242dc5df6916 100644
--- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp
@@ -105,9 +105,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
     step.push_back(rewriter.clone(*stepOp)->getResult(0));
     // ==== TODO (1) End ====
 
-    auto wsLoopOp = rewriter.create<mlir::omp::WsLoopOp>(
-        doLoop.getLoc(), lowerBound, upperBound, step);
-    wsLoopOp.setInclusive(true);
+    auto wsLoopOp = rewriter.create<mlir::omp::WsLoopOp>(doLoop.getLoc());
+    rewriter.createBlock(&wsLoopOp.getRegion());
+
+    // TODO Test that this didn't break something.
+    auto loopNestOp = rewriter.create<mlir::omp::LoopNestOp>(
+        doLoop.getLoc(), lowerBound, upperBound, step, /*inclusive=*/true);
 
     auto outlineableOp =
         mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(*parallelOp);
@@ -180,11 +183,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
 
     // Clone the loop's body inside the worksharing construct using the mapped
     // memref values.
-    rewriter.cloneRegionBefore(doLoop.getRegion(), wsLoopOp.getRegion(),
-                               wsLoopOp.getRegion().begin(), mapper);
+    rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(),
+                               loopNestOp.getRegion().begin(), mapper);
 
-    mlir::Operation *terminator = wsLoopOp.getRegion().back().getTerminator();
-    rewriter.setInsertionPointToEnd(&wsLoopOp.getRegion().back());
+    mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator();
+    rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
     rewriter.create<mlir::omp::YieldOp>(terminator->getLoc());
     rewriter.eraseOp(terminator);
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index cf3bc9683cc89..7e6a590d10bb0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1204,7 +1204,7 @@ def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
 // 2.10.4 taskyield Construct
 //===----------------------------------------------------------------------===//
 
-def TaskyieldOp : OpenMP_Op<"taskyield"> {
+def TaskYieldOp : OpenMP_Op<"taskyield"> {
   let summary = "taskyield construct";
   let description = [{
     The taskyield construct specifies that the current task can be suspended
@@ -1723,6 +1723,10 @@ def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface
     TODO:  is_device_ptr, defaultmap, in_reduction
   }];
 
+  // TODO Remove num_teams_lower, num_teams_upper, teams_thread_limit and
+  // num_threads args and instead calculate them outside and implicitly map
+  // them. If not used and implicitly mapped, they can be omitted from the
+  // outlined function arg list.
   let arguments = (ins Optional<I1>:$if_expr,
                        Optional<AnyInteger>:$device,
                        Optional<AnyInteger>:$thread_limit,
@@ -1914,7 +1918,7 @@ def OrderedRegionOp : OpenMP_Op<"ordered_region"> {
 // 2.17.5 taskwait Construct
 //===----------------------------------------------------------------------===//
 
-def TaskwaitOp : OpenMP_Op<"taskwait"> {
+def TaskWaitOp : OpenMP_Op<"taskwait"> {
   let summary = "taskwait construct";
   let description = [{
     The taskwait construct specifies a wait on the completion of child tasks
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 83eb1653ca950..117db5c39db59 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -319,8 +319,8 @@ void ConvertOpenMPToLLVMPass::runOnOperation() {
   populateOpenMPToLLVMConversionPatterns(converter, patterns);
 
   LLVMConversionTarget target(getContext());
-  target.addLegalOp<omp::TerminatorOp, omp::TaskyieldOp, omp::FlushOp,
-                    omp::BarrierOp, omp::TaskwaitOp>();
+  target.addLegalOp<omp::TerminatorOp, omp::TaskYieldOp, omp::FlushOp,
+                    omp::BarrierOp, omp::TaskWaitOp>();
   configureOpenMPToLLVMConversionLegality(target, converter);
   if (failed(applyPartialConversion(module, target, std::move(patterns))))
     signalPassFailure();
diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
index 464a647564ace..7bebd874f47b4 100644
--- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
+++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
@@ -461,7 +461,11 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
       // Replace the loop.
       {
         OpBuilder::InsertionGuard allocaGuard(rewriter);
-        auto loop = rewriter.create<omp::WsLoopOp>(
+        // TODO Test that this didn't break something.
+        auto wsloop = rewriter.create<omp::WsLoopOp>(parallelOp.getLoc());
+        rewriter.createBlock(&wsloop.getRegion());
+
+        auto loop = rewriter.create<omp::LoopNestOp>(
             parallelOp.getLoc(), parallelOp.getLowerBound(),
             parallelOp.getUpperBound(), parallelOp.getStep());
         rewriter.create<omp::TerminatorOp>(loc);
@@ -482,9 +486,9 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
         rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
         rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
         if (!reductionVariables.empty()) {
-          loop.setReductionsAttr(
+          wsloop.setReductionsAttr(
               ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
-          loop.getReductionVarsMutable().append(reductionVariables);
+          wsloop.getReductionVarsMutable().append(reductionVariables);
         }
       }
     }
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index c7130e8e2c9f3..bdaecb5e6de9e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1933,6 +1933,9 @@ LogicalResult WsLoopOp::verify() {
   // TODO If composite, must have composite parallel parent or simd and no
   // wrapper parent. Otherwise, no composite parent.
 
+  // TODO If composite, must have composite parallel parent or simd and no
+  // wrapper parent. Otherwise, no composite parent.
+
   return verifyReductionVarList(*this, getReductions(), getReductionVars());
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 3605e03ae886e..7e5e3c47e0d03 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -893,31 +893,32 @@ static LogicalResult
 convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  auto loop = cast<omp::WsLoopOp>(opInst);
+  auto wsloop = cast<omp::WsLoopOp>(opInst);
+  auto loop = cast<omp::LoopNestOp>(wsloop.getWrappedLoop());
   // TODO: this should be in the op verifier instead.
   if (loop.getLowerBound().empty())
     return failure();
 
   // Static is the default.
   auto schedule =
-      loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
+      wsloop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static);
 
   // Find the loop configuration.
   llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]);
   llvm::Type *ivType = step->getType();
   llvm::Value *chunk = nullptr;
-  if (loop.getScheduleChunkVar()) {
+  if (wsloop.getScheduleChunkVar()) {
     llvm::Value *chunkVar =
-        moduleTranslation.lookupValue(loop.getScheduleChunkVar());
+        moduleTranslation.lookupValue(wsloop.getScheduleChunkVar());
     chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
   }
   SmallVector<omp::ReductionDeclareOp> reductionDecls;
-  collectReductionDecls(loop, reductionDecls);
+  collectReductionDecls(wsloop, reductionDecls);
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
 
   DenseMap<Value, llvm::Value *> reductionVariableMap;
-  allocReductionVars(loop, builder, *ompBuilder, moduleTranslation, allocaIP,
+  allocReductionVars(wsloop, builder, *ompBuilder, moduleTranslation, allocaIP,
                      reductionDecls, reductionVariableMap);
 
   // Store the mapping between reduction variables and their private copies on
@@ -929,7 +930,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Before the loop, store the initial values of reductions into reduction
   // variables. Although this could be done after allocas, we don't want to mess
   // up with the alloca insertion point.
-  for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {
+  for (unsigned i = 0; i < wsloop.getNumReductionVars(); ++i) {
     SmallVector<llvm::Value *> phis;
     if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
                                        "omp.reduction.neutral", builder,
@@ -1020,10 +1021,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
 
   // TODO: Handle doacross loops when the ordered clause has a parameter.
-  bool isOrdered = loop.getOrderedVal().has_value();
+  bool isOrdered = wsloop.getOrderedVal().has_value();
   std::optional<omp::ScheduleModifier> scheduleModifier =
-      loop.getScheduleModifier();
-  bool isSimd = loop.getSimdModifier();
+      wsloop.getScheduleModifier();
+  bool isSimd = wsloop.getSimdModifier();
 
   bool distributeCodeGen = opInst.getParentOfType<omp::DistributeOp>();
   bool parallelCodeGen = opInst.getParentOfType<omp::ParallelOp>();
@@ -1036,7 +1037,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
     workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop;
   }
   ompBuilder->applyWorkshareLoop(
-      ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(),
+      ompLoc.DL, loopInfo, allocaIP, !wsloop.getNowait(),
       convertToScheduleKind(schedule), chunk, isSimd,
       scheduleModifier == omp::ScheduleModifier::monotonic,
       scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered,
@@ -1049,12 +1050,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.restoreIP(afterIP);
 
   // Process the reductions if required.
-  if (loop.getNumReductionVars() == 0)
+  if (wsloop.getNumReductionVars() == 0)
     return success();
 
   // Create the reduction generators. We need to own them here because
   // ReductionInfo only accepts references to the generators.
-  collectReductionInfo(loop, builder, *ompBuilder, moduleTranslation,
+  collectReductionInfo(wsloop, builder, *ompBuilder, moduleTranslation,
                        reductionDecls);
   // The call to createReductions below expects the block to have a
   // terminator. Create an unreachable instruction to serve as terminator
@@ -1063,12 +1064,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   builder.SetInsertPoint(tempTerminator);
 
   llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
-      ompBuilder->createReductions(builder.saveIP(), allocaIP,
-                                   ompBuilder->RIManager.getReductionInfos(),
-                                   loop.getNowait(), /*IsTeamsReduction*/ false,
-                                   /*HasDistribute*/ distributeCodeGen);
+      ompBuilder->createReductions(
+          builder.saveIP(), allocaIP, ompBuilder->RIManager.getReductionInfos(),
+          wsloop.getNowait(), /*IsTeamsReduction*/ false,
+          /*HasDistribute*/ distributeCodeGen);
   if (!contInsertPoint.getBlock())
-    return loop->emitOpError() << "failed to convert reductions";
+    return wsloop->emitOpError() << "failed to convert reductions";
   auto nextInsertionPoint =
       ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
   tempTerminator->eraseFromParent();
@@ -1331,7 +1332,8 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder,
 static LogicalResult
 convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
-  auto loop = cast<omp::SimdLoopOp>(opInst);
+  auto simd = cast<omp::SimdLoopOp>(opInst);
+  auto loop = cast<omp::LoopNestOp>(simd.getWrappedLoop());
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
@@ -1410,17 +1412,17 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,
       ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
 
   llvm::ConstantInt *simdlen = nullptr;
-  if (std::optional<uint64_t> simdlenVar = loop.getSimdlen())
+  if (std::optional<uint64_t> simdlenVar = simd.getSimdlen())
     simdlen = builder.getInt64(simdlenVar.value());
 
   llvm::ConstantInt *safelen = nullptr;
-  if (std::optional<uint64_t> safelenVar = loop.getSafelen())
+  if (std::optional<uint64_t> safelenVar = simd.getSafelen())
     safelen = builder.getInt64(safelenVar.value());
 
   llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
   ompBuilder->applySimd(
       loopInfo, alignedVars,
-      loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr())
+      simd.getIfExpr() ? moduleTranslation.lookupValue(simd.getIfExpr())
                        : nullptr,
       llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);
 
@@ -3320,11 +3322,11 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
         ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
         return success();
       })
-      .Case([&](omp::TaskwaitOp) {
+      .Case([&](omp::TaskWaitOp) {
         ompBuilder->createTaskwait(builder.saveIP());
         return success();
       })
-      .Case([&](omp::TaskyieldOp) {
+      .Case([&](omp::TaskYieldOp) {
         ompBuilder->createTaskyield(builder.saveIP());
         return success();
       })

From 0319ceb9306e37077e05ad98c981fb49f0c30a5c Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee@amd.com>
Date: Tue, 26 Mar 2024 13:04:28 +0000
Subject: [PATCH 2/2] Akash- Fix tests related to omp.wsloop.

---
 flang/test/Lower/OpenMP/FIR/copyin.f90        |   8 +-
 .../OpenMP/FIR/lastprivate-commonblock.f90    |   5 +-
 .../parallel-lastprivate-clause-scalar.f90    |  38 ++--
 .../FIR/parallel-private-clause-fixes.f90     |   3 +-
 .../OpenMP/FIR/parallel-private-clause.f90    |   9 +-
 .../OpenMP/FIR/parallel-wsloop-firstpriv.f90  |  10 +-
 .../test/Lower/OpenMP/FIR/parallel-wsloop.f90 |  24 ++-
 .../Lower/OpenMP/FIR/stop-stmt-in-region.f90  |   3 +-
 flang/test/Lower/OpenMP/FIR/target.f90        |   3 +-
 flang/test/Lower/OpenMP/FIR/unstructured.f90  |   9 +-
 flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 |  11 +-
 .../test/Lower/OpenMP/FIR/wsloop-collapse.f90 |   3 +-
 .../Lower/OpenMP/FIR/wsloop-monotonic.f90     |   3 +-
 .../Lower/OpenMP/FIR/wsloop-nonmonotonic.f90  |   3 +-
 .../test/Lower/OpenMP/FIR/wsloop-ordered.f90  |   6 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-add.f90 |  18 +-
 .../OpenMP/FIR/wsloop-reduction-iand.f90      |   3 +-
 .../OpenMP/FIR/wsloop-reduction-ieor.f90      |   3 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-ior.f90 |   5 +-
 .../FIR/wsloop-reduction-logical-eqv.f90      |   9 +-
 .../FIR/wsloop-reduction-logical-neqv.f90     |   9 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-max.f90 |   6 +-
 .../Lower/OpenMP/FIR/wsloop-reduction-min.f90 |   6 +-
 flang/test/Lower/OpenMP/FIR/wsloop-simd.f90   |   3 +-
 .../test/Lower/OpenMP/FIR/wsloop-variable.f90 |  15 +-
 flang/test/Lower/OpenMP/FIR/wsloop.f90        |   9 +-
 .../Todo/omp-default-clause-inner-loop.f90    |   3 +-
 flang/test/Lower/OpenMP/default-clause.f90    |  25 +--
 flang/test/Lower/OpenMP/hlfir-wsloop.f90      |   3 +-
 .../Lower/OpenMP/lastprivate-commonblock.f90  |   3 +-
 flang/test/Lower/OpenMP/lastprivate-iv.f90    |   6 +-
 .../parallel-lastprivate-clause-scalar.f90    |  36 ++--
 .../OpenMP/parallel-private-clause-fixes.f90  |   3 +-
 .../Lower/OpenMP/parallel-private-clause.f90  |   9 +-
 .../OpenMP/parallel-wsloop-firstpriv.f90      |  10 +-
 flang/test/Lower/OpenMP/parallel-wsloop.f90   |  24 ++-
 .../test/Lower/OpenMP/stop-stmt-in-region.f90 |   3 +-
 flang/test/Lower/OpenMP/target.f90            |   3 +-
 flang/test/Lower/OpenMP/unstructured.f90      |   9 +-
 flang/test/Lower/OpenMP/wsloop-chunks.f90     |  11 +-
 flang/test/Lower/OpenMP/wsloop-collapse.f90   |   3 +-
 flang/test/Lower/OpenMP/wsloop-monotonic.f90  |   3 +-
 .../test/Lower/OpenMP/wsloop-nonmonotonic.f90 |   3 +-
 flang/test/Lower/OpenMP/wsloop-ordered.f90    |   6 +-
 .../OpenMP/wsloop-reduction-add-hlfir.f90     |   3 +-
 .../Lower/OpenMP/wsloop-reduction-add.f90     |  21 +-
 .../Lower/OpenMP/wsloop-reduction-iand.f90    |   3 +-
 .../Lower/OpenMP/wsloop-reduction-ieor.f90    |   3 +-
 .../Lower/OpenMP/wsloop-reduction-ior.f90     |   3 +-
 .../OpenMP/wsloop-reduction-logical-and.f90   |   9 +-
 .../OpenMP/wsloop-reduction-logical-eqv.f90   |   9 +-
 .../OpenMP/wsloop-reduction-logical-neqv.f90  |   9 +-
 .../OpenMP/wsloop-reduction-logical-or.f90    |   9 +-
 .../OpenMP/wsloop-reduction-max-hlfir.f90     |   3 +-
 .../Lower/OpenMP/wsloop-reduction-max.f90     |   9 +-
 .../Lower/OpenMP/wsloop-reduction-min.f90     |   9 +-
 .../Lower/OpenMP/wsloop-reduction-mul.f90     |  21 +-
 flang/test/Lower/OpenMP/wsloop-simd.f90       |   3 +-
 .../test/Lower/OpenMP/wsloop-unstructured.f90 |   3 +-
 flang/test/Lower/OpenMP/wsloop-variable.f90   |  15 +-
 flang/test/Lower/OpenMP/wsloop.f90            |   9 +-
 flang/test/Transforms/DoConcurrent/basic.f90  |   3 +-
 mlir/test/CAPI/execution_engine.c             |   5 +-
 .../OpenMPToLLVM/convert-to-llvmir.mlir       |  11 +-
 .../Conversion/SCFToOpenMP/scf-to-openmp.mlir |  14 +-
 .../Dialect/LLVMIR/legalize-for-export.mlir   |   5 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         |  63 ++++--
 .../LLVMIR/omptarget-parallel-wsloop.mlir     |   5 +-
 .../LLVMIR/omptarget-wsloop-collapsed.mlir    |   5 +-
 mlir/test/Target/LLVMIR/omptarget-wsloop.mlir |  10 +-
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      | 202 ++++++++++++------
 mlir/test/Target/LLVMIR/openmp-nested.mlir    |  10 +-
 mlir/test/Target/LLVMIR/openmp-reduction.mlir |  37 ++--
 73 files changed, 592 insertions(+), 306 deletions(-)

diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90
index 20023a81977ae..7161722b21090 100644
--- a/flang/test/Lower/OpenMP/FIR/copyin.f90
+++ b/flang/test/Lower/OpenMP/FIR/copyin.f90
@@ -145,7 +145,8 @@ subroutine copyin_derived_type()
 ! CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop   for  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:           omp.wsloop   {
+! CHECK:           omp.loopnest  (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:             fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:             fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref<i32>) -> ()
 ! CHECK:             omp.yield
@@ -286,7 +287,8 @@ subroutine common_1()
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref<i32>
 !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
+!CHECK: omp.wsloop   {
+!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) {
 !CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref<i32>
 !CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref<i32>
 !CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref<i32>
@@ -303,7 +305,7 @@ subroutine common_2()
   integer :: y
   common /d/ x, y
   !$omp threadprivate(/d/)
-  
+
   !$omp parallel do copyin(/d/)
      do i = 1, x
         y = y + i
diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
index 389bcba35f77f..f779fc9a775cb 100644
--- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
@@ -1,4 +1,4 @@
-! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s 
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
 
 !CHECK: func.func @_QPlastprivate_common() {
 !CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
@@ -17,7 +17,8 @@
 !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32
 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32
 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32
-!CHECK: omp.wsloop   for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
+!CHECK: omp.wsloop   {
+!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
 !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref<i32>
 !CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32
 !CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
index 2060e2062c1a3..68867a0bd4149 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
@@ -9,16 +9,17 @@
 !CHECK-DAG: %[[ARG1_REF:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.char<1,5>>
 
 !CHECK: omp.parallel {
-!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", 
+!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1",
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK-DAG: omp.wsloop {
+!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
-!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
+!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]]
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
 !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
-!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] 
+!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]]
 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]]
 !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]])
 !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])
@@ -37,12 +38,12 @@
 !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
 !CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
 !CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}})
-!CHECK-DAG: } 
+!CHECK-DAG: }
 !CHECK-DAG: omp.yield
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1(n:n) = 'c'
@@ -55,7 +56,8 @@ subroutine lastprivate_character(arg1)
 !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "arg1"}) {
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -75,7 +77,7 @@ subroutine lastprivate_character(arg1)
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
@@ -90,7 +92,8 @@ subroutine lastprivate_int(arg1)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -111,7 +114,7 @@ subroutine lastprivate_int(arg1)
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -127,7 +130,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK: omp.parallel  {
 !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -148,7 +152,7 @@ subroutine mult_lastprivate_int(arg1, arg2)
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1, arg2)
 do n = 1, 5
         arg1 = 2
@@ -169,7 +173,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 ! Lastprivate Allocation
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -188,7 +193,7 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -207,7 +212,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref<i32>
 !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -225,7 +231,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
index c99bf761333b8..ce46925327ba1 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90
@@ -13,7 +13,8 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
index 8f5d280943cc2..204909e30cb96 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90
@@ -259,7 +259,8 @@ subroutine simple_loop_1
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -295,7 +296,8 @@ subroutine simple_loop_2
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -330,7 +332,8 @@ subroutine simple_loop_3
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
index 6eb39a2f63725..b26e618693316 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90
@@ -3,7 +3,7 @@
 
 ! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s
 
-! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}) 
+! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
 subroutine omp_do_firstprivate(a)
   integer::a
   integer::n
@@ -17,7 +17,8 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -29,7 +30,7 @@ subroutine omp_do_firstprivate(a)
   call bar(a)
 end subroutine omp_do_firstprivate
 
-! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) 
+! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
 subroutine omp_do_firstprivate2(a, n)
   integer::a
   integer::n
@@ -48,7 +49,8 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
index 8649cf284ffd9..b1003253822da 100644
--- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90
@@ -9,7 +9,8 @@ subroutine simple_parallel_do
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -35,7 +36,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -58,7 +60,8 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(dynamic) {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -90,7 +93,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:    omp.wsloop {
+  ! CHECK:    omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref<i32>
@@ -140,7 +144,8 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -182,7 +187,8 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -224,7 +230,8 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -266,7 +273,8 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
index d6c10bdee88d5..2c6f460ceacac 100644
--- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90
@@ -77,7 +77,8 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:         omp.wsloop   {
+! CHECK:         omp.loopnest  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref<i32>
 ! CHECK:           cf.br ^bb1
 ! CHECK:         ^bb1:
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index 3962603572ba0..9e113e2e59d8c 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -487,7 +487,8 @@ subroutine omp_target_parallel_do
          !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
          !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32
          !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
-         !CHECK: omp.wsloop   for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
+         !CHECK: omp.wsloop   {
+         !CHECK: omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) {
          !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<i32>
          !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32
          !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90
index bfaf38b7ef1af..390b3a8746e8b 100644
--- a/flang/test/Lower/OpenMP/FIR/unstructured.f90
+++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90
@@ -67,14 +67,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG1:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:       %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref<i32>
 ! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG2:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref<i32>
 ! CHECK:       br ^bb1
 ! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
@@ -117,7 +119,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK-LABEL: func @_QPss4{{.*}} {
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
+! CHECK:         omp.wsloop {
+! CHECK:         omp.loopnest (%[[ARG:.*]]) : {{.*}} {
 ! CHECK:           fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref<i32>
 ! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
index 4030f46299d0b..4b6498adb31de 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90
@@ -19,7 +19,8 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
 ! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref<i32>
 ! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref<i32>
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -37,7 +38,8 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
 ! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
 ! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref<i32>
@@ -45,7 +47,7 @@ program wsloop
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
-  
+
 end do
 !$OMP END DO NOWAIT
 chunk = 6
@@ -61,7 +63,8 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
 ! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
 ! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
index 933fc0910e338..aa66b49edf9ec 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90
@@ -39,7 +39,8 @@ program wsloop_collapse
   do i = 1, a
      do j= 1, b
         do k = 1, c
-! CHECK:           omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) {
 ! CHECK:             fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i32>
 ! CHECK:             fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
index 1c381475f6cbb..70f0e7d00b4f9 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90
@@ -15,7 +15,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
index 3f425200b8fa4..9170a75b8248e 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90
@@ -16,7 +16,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
index 7548d7a597228..9b3daba2f170b 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90
@@ -6,7 +6,8 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(0) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
@@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(1) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
index 5664529416fe8..a620cd4852965 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90
@@ -61,7 +61,8 @@
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
@@ -97,7 +98,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref<f32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
@@ -133,7 +135,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_5:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  for  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) {
 ! CHECK:               fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -210,7 +213,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -263,7 +267,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  for  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref<f32>
 ! CHECK:               %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref<i32>
@@ -322,7 +327,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  for  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
 ! CHECK:               fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
index 9ce1725dbab04..c5401204c0ca6 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
index f6027416246af..7775bb4c2dc97 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90
@@ -13,7 +13,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
index bc143611abe8d..cef02492931c1 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90
@@ -10,10 +10,11 @@
 !CHECK: omp.yield(%[[IOR_VAL_I]] : i32)
 
 !CHECK-LABEL: @_QPreduction_ior
-!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>> 
+!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"}
 !CHECK: omp.parallel
-!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for 
+!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
index d5aacd74d8b10..7592da1a8844d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90
@@ -30,7 +30,8 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
@@ -72,7 +73,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
@@ -122,7 +124,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
index 9f44e0e26d407..b8cdf4bf46c7f 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90
@@ -31,7 +31,8 @@
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
@@ -73,7 +74,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_6:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
 ! CHECK:               fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64
@@ -123,7 +125,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref<!fir.logical<4>>
 ! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
index af79658491b56..85702c5dc020d 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90
@@ -21,7 +21,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -34,7 +35,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:     omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
index 1095718b4b13f..b82d943009e90 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90
@@ -21,7 +21,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xi32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<i32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<i32>
@@ -35,7 +36,8 @@
 !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box<!fir.array<?xf32>>
 !CHECK:   %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"}
 !CHECK:   omp.parallel
-!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) for
+!CHECK:     omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref<f32>) {
+!CHECK:     omp.loopnest
 !CHECK:       %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref<f32>
 !CHECK:       %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]]
 !CHECK:       %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref<f32>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
index 2e3f8ca3c207d..446ad4279a682 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90
@@ -14,7 +14,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(runtime, simd) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[STORE:.*]] : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
index 4f34f30f3e7c9..f00b7ec0979cb 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90
@@ -22,7 +22,8 @@ program wsloop_variable
 !CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref<i16>
 !CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref<i64>
@@ -46,7 +47,8 @@ program wsloop_variable
 !CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
 !CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
 !CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref<i16>
 !CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref<i16>
@@ -65,7 +67,8 @@ program wsloop_variable
 !CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
 !CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
 !CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
 !CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
 !CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref<i128>
 !CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref<i128>
@@ -97,7 +100,8 @@ end program wsloop_variable
 !CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i16>
 !CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
 !CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
-!CHECK:         omp.wsloop   for  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:         omp.wsloop   {
+!CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 !CHECK:           %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:           fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref<i16>
 !CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
@@ -146,7 +150,8 @@ subroutine wsloop_variable_sub
 !CHECK:         %[[C1:.*]] = arith.constant 1 : i32
 !CHECK:         %[[C10:.*]] = arith.constant 10 : i32
 !CHECK:         %[[C1_2:.*]] = arith.constant 1 : i32
-!CHECK:         omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
+!CHECK:         omp.wsloop {
+!CHECK:         omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) {
 !CHECK:           %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8
 !CHECK:           fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref<i8>
 !CHECK:           %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref<i8>
diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90
index abc0489b08ff5..66972c73d2eb0 100644
--- a/flang/test/Lower/OpenMP/FIR/wsloop.f90
+++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90
@@ -11,7 +11,8 @@ subroutine simple_loop
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref<i32>
@@ -34,7 +35,8 @@ subroutine simple_loop_with_step
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
   ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref<i32>
   !$OMP DO
@@ -57,7 +59,8 @@ subroutine loop_with_schedule_nowait
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(runtime) nowait {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
   ! CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
index 5c624d31b5f36..4c084d81ffa89 100644
--- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
+++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90
@@ -12,7 +12,8 @@
 ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32
 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32
 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32
-! CHECK: omp.wsloop   for  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
+! CHECK: omp.wsloop   {
+! CHECK: omp.loopnest  (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) {
 ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref<i32>
 ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref<i32>
 ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90
index 0e118742689d6..401ed4f612467 100644
--- a/flang/test/Lower/OpenMP/default-clause.f90
+++ b/flang/test/Lower/OpenMP/default-clause.f90
@@ -192,7 +192,7 @@ subroutine nested_default_clause_tests
 !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"}
 !CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref<i32>
-!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> 
+!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 30 : i32
 !CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref<i32>
 !CHECK: %[[CONST:.*]] = arith.constant 40 : i32
@@ -205,21 +205,21 @@ subroutine nested_default_clause_tests
 !CHECK: }
 !CHECK: omp.terminator
 !CHECK: }
-    !$omp parallel  firstprivate(x) private(y) shared(w) default(private)  
+    !$omp parallel  firstprivate(x) private(y) shared(w) default(private)
         !$omp parallel default(private)
            y = 20
-           x = 10 
-        !$omp end parallel 
+           x = 10
+        !$omp end parallel
 
-        !$omp parallel default(firstprivate) shared(y) private(w) 
+        !$omp parallel default(firstprivate) shared(y) private(w)
             y = 30
-            w = 40 
+            w = 40
             z = 50
             k = 40
         !$omp end parallel
     !$omp end parallel
-    
-    
+
+
 !CHECK: omp.parallel {
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"}
@@ -260,8 +260,8 @@ subroutine nested_default_clause_tests
         !$omp parallel default(private) shared(z)
             w = x + z
         !$omp end parallel
-    !$omp end parallel    
-    
+    !$omp end parallel
+
 !CHECK: omp.parallel {
 !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"}
 !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -319,7 +319,7 @@ subroutine nested_default_clause_tests
 !CHECK: omp.terminator
 !CHECK: }
 !CHECK: return
-!CHECK: } 
+!CHECK: }
 	!$omp parallel default(firstprivate)
 		!$omp single
 			x = y
@@ -352,7 +352,8 @@ subroutine skipped_default_clause_checks()
        type(it)::iii
 
 !CHECK: omp.parallel {
-!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for (%[[ARG:.*]]) {{.*}} {
+!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest (%[[ARG:.*]]) {{.*}} {
 !CHECK: omp.yield
 !CHECK: }
 !CHECK: omp.terminator
diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
index b6be77fe3016d..307be89fb5ecb 100644
--- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90
+++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90
@@ -12,7 +12,8 @@ subroutine simple_loop
   !$OMP PARALLEL
   ! CHECK-DAG:     %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned}
   ! CHECK:     %[[IV:.*]]    = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]])
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[IV:.*]] : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
index a11bdee156637..45450a5a8fd9b 100644
--- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90
@@ -15,7 +15,8 @@
 !CHECK:    %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 !CHECK:    %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
 !CHECK:    %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-!CHECK:    omp.wsloop   for  (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+!CHECK:    omp.wsloop   {
+!CHECK:    omp.loopnest  (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 !CHECK:      %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
 !CHECK:      %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90
index 70fe500129d12..16204ed79b019 100644
--- a/flang/test/Lower/OpenMP/lastprivate-iv.f90
+++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90
@@ -9,7 +9,8 @@
 !CHECK:    %[[LB:.*]] = arith.constant 4 : i32
 !CHECK:    %[[UB:.*]] = arith.constant 10 : i32
 !CHECK:    %[[STEP:.*]]  = arith.constant 3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:    omp.wsloop {
+!CHECK:    omp.loopnest  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
 !CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
@@ -41,7 +42,8 @@ subroutine lastprivate_iv_inc()
 !CHECK:    %[[LB:.*]] = arith.constant 10 : i32
 !CHECK:    %[[UB:.*]] = arith.constant 1 : i32
 !CHECK:    %[[STEP:.*]]  = arith.constant -3 : i32
-!CHECK:    omp.wsloop for  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+!CHECK:    omp.wsloop {
+!CHECK:    omp.loopnest  (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
 !CHECK:      fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
 !CHECK:      %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
 !CHECK:      %[[C0:.*]] = arith.constant 0 : i32
diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
index 28f59c95d60bb..cf4f028987022 100644
--- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
+++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90
@@ -14,13 +14,14 @@
 !CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
 
 ! Check that we are accessing the clone inside the loop
-!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK-DAG: omp.wsloop {
+!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32
 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX
-!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] 
+!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]]
 !CHECK-NEXT: %[[CNST:.*]] = arith.constant
 !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
-!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 
+!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1
 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]]
 !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]])
 !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])
@@ -37,12 +38,12 @@
 
 ! Testing lastprivate val update
 !CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>
-!CHECK-DAG: } 
+!CHECK-DAG: }
 !CHECK-DAG: omp.yield
 
 subroutine lastprivate_character(arg1)
         character(5) :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1(n:n) = 'c'
@@ -57,7 +58,8 @@ subroutine lastprivate_character(arg1)
 !CHECK-DAG: omp.parallel  {
 !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1"
 !CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -77,7 +79,7 @@ subroutine lastprivate_character(arg1)
 
 subroutine lastprivate_int(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
@@ -96,7 +98,8 @@ subroutine lastprivate_int(arg1)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -117,7 +120,7 @@ subroutine lastprivate_int(arg1)
 
 subroutine mult_lastprivate_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -137,7 +140,8 @@ subroutine mult_lastprivate_int(arg1, arg2)
 !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 !Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -158,7 +162,7 @@ subroutine mult_lastprivate_int(arg1, arg2)
 
 subroutine mult_lastprivate_int2(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO LASTPRIVATE(arg1, arg2)
 do n = 1, 5
         arg1 = 2
@@ -183,7 +187,8 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 !CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2"
 !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK-NOT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
@@ -202,7 +207,7 @@ subroutine mult_lastprivate_int2(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int(arg1, arg2)
         integer :: arg1, arg2
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2)
 do n = 1, 5
         arg1 = 2
@@ -223,7 +228,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref<i32>
 !CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
 !CHECK-NEXT: omp.barrier
-!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
+!CHECK: omp.wsloop {
+!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} {
 ! Testing last iteration check
 !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
 !CHECK: %[[C0:.*]] = arith.constant 0 : i32
@@ -241,7 +247,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
 
 subroutine firstpriv_lastpriv_int2(arg1)
         integer :: arg1
-!$OMP PARALLEL 
+!$OMP PARALLEL
 !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1)
 do n = 1, 5
         arg1 = 2
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index 8533106b7ac48..53846bef2d4f9 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -21,7 +21,8 @@
 ! CHECK:           %[[ONE:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref<i32>
 ! CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:           omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
+! CHECK:           omp.wsloop {
+! CHECK:           omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:             fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref<i32>
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90
index 5578b6710da7c..e870d8f8f0669 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90
@@ -304,7 +304,8 @@ subroutine simple_loop_1
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
@@ -342,7 +343,8 @@ subroutine simple_loop_2
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref<i32>
@@ -379,7 +381,8 @@ subroutine simple_loop_3
   ! FIRDialect:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! FIRDialect:     %[[WS_STEP:.*]] = arith.constant 1 : i32
 
-  ! FIRDialect:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! FIRDialect:     omp.wsloop {
+  ! FIRDialect:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(r)
   do i=1, 9
   ! FIRDialect:     fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
index 716a7d71bb628..9eb05978b2d4a 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90
@@ -3,7 +3,7 @@
 
 ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
 
-! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}) 
+! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"})
 subroutine omp_do_firstprivate(a)
   ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   integer::a
@@ -20,7 +20,8 @@ subroutine omp_do_firstprivate(a)
   ! CHECK: %[[LB:.*]] = arith.constant 1 : i32
   ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK-NEXT: omp.wsloop   for  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK-NEXT: omp.wsloop   {
+  ! CHECK-NEXT: omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK-NEXT: omp.yield
@@ -32,7 +33,7 @@ subroutine omp_do_firstprivate(a)
   call bar(a)
 end subroutine omp_do_firstprivate
 
-! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) 
+! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
 subroutine omp_do_firstprivate2(a, n)
   ! CHECK:  %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   ! CHECK:  %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -56,7 +57,8 @@ subroutine omp_do_firstprivate2(a, n)
   ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref<i32>
   ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
-  ! CHECK: omp.wsloop   for  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
+  ! CHECK: omp.wsloop   {
+  ! CHECK: omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]])
   ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
   ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
   ! CHECK: omp.yield
diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90
index c06f941b74b58..fcb2791554ab0 100644
--- a/flang/test/Lower/OpenMP/parallel-wsloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90
@@ -9,7 +9,8 @@ subroutine simple_parallel_do
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -37,7 +38,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -61,7 +63,8 @@ subroutine parallel_do_with_clauses(nt)
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(dynamic) {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -97,7 +100,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt)
   ! CHECK:    %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:    %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:    %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:    omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:    omp.wsloop {
+  ! CHECK:    omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
   !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt)
   do i=1, 9
   ! CHECK:    fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref<i32>
@@ -150,7 +154,8 @@ end subroutine parallel_private_do
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -196,7 +201,8 @@ end subroutine omp_parallel_multiple_firstprivate_do
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -241,7 +247,8 @@ end subroutine parallel_do_private
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 9 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<!fir.logical<4>>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
@@ -287,7 +294,8 @@ end subroutine omp_parallel_do_multiple_firstprivate
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop   for  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
+! CHECK:             omp.wsloop   {
+! CHECK:             omp.loopnest  (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) {
 ! CHECK:               fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref<i32>
 ! CHECK:               fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref<i32>, !fir.ref<i32>) -> ()
 ! CHECK:               omp.yield
diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
index fdbabc21b2c9e..fd80c563a8f4f 100644
--- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
+++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90
@@ -82,7 +82,8 @@ subroutine test_stop_in_region3()
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 10 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 1 : i32
-! CHECK:         omp.wsloop   for  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
+! CHECK:         omp.wsloop   {
+! CHECK:         omp.loopnest  (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) {
 ! CHECK:           fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref<i32>
 ! CHECK:           cf.br ^bb1
 ! CHECK:         ^bb1:
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 43598aff08bfe..e1c2a1f0966f5 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -594,7 +594,8 @@ subroutine omp_target_parallel_do
       !$omp target parallel do map(tofrom: a)
          !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
          !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-         !CHECK: omp.wsloop for  (%[[I_VAL:.*]]) : i32
+         !CHECK: omp.wsloop {
+         !CHECK: omp.loopnest  (%[[I_VAL:.*]]) : i32
          do i = 1, 1024
            !CHECK:   fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref<i32>
            !CHECK:   %[[C10:.*]] = arith.constant 10 : i32
diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90
index e5bf980ce90fd..13d3c2bf04206 100644
--- a/flang/test/Lower/OpenMP/unstructured.f90
+++ b/flang/test/Lower/OpenMP/unstructured.f90
@@ -70,14 +70,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct
 ! CHECK:   ^bb1:  // 2 preds: ^bb0, ^bb3
 ! CHECK:     cond_br %{{[0-9]*}}, ^bb2, ^bb4
 ! CHECK:   ^bb2:  // pred: ^bb1
-! CHECK:     omp.wsloop for (%[[ARG1:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG1:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref<i32>
 ! CHECK:     @_FortranAioBeginExternalListOutput
 ! CHECK:       %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref<i32>
 ! CHECK:     @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]])
 ! CHECK:       omp.yield
 ! CHECK:     }
-! CHECK:     omp.wsloop for (%[[ARG2:.*]]) : {{.*}} {
+! CHECK:     omp.wsloop {
+! CHECK:     omp.loopnest (%[[ARG2:.*]]) : {{.*}} {
 ! CHECK:       fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:       br ^bb1
 ! CHECK:     ^bb2:  // 2 preds: ^bb1, ^bb5
@@ -121,7 +123,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs
 ! CHECK:       omp.parallel {
 ! CHECK:         %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned}
 ! CHECK:         %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:         omp.wsloop for (%[[ARG:.*]]) : {{.*}} {
+! CHECK:         omp.wsloop {
+! CHECK:         omp.loopnest (%[[ARG:.*]]) : {{.*}} {
 ! CHECK:           fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref<i32>
 ! CHECK:           %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}}
 ! CHECK:           %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}}
diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90
index 5016c8985bda0..2fe787150de32 100644
--- a/flang/test/Lower/OpenMP/wsloop-chunks.f90
+++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90
@@ -20,7 +20,8 @@ program wsloop
 ! CHECK:         %[[VAL_3:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_5:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait for  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_5]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) {
 ! CHECK:           fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref<i32>
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
@@ -38,7 +39,8 @@ program wsloop
 ! CHECK:         %[[VAL_15:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_17:.*]] = arith.constant 4 : i32
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait for  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_17]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) {
 ! CHECK:           fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[VAL_24:.*]] = arith.constant 2 : i32
 ! CHECK:           %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref<i32>
@@ -46,7 +48,7 @@ program wsloop
 ! CHECK:           {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref<i8>, i32) -> i1
 ! CHECK:           omp.yield
 ! CHECK:         }
-  
+
 end do
 !$OMP END DO NOWAIT
 chunk = 6
@@ -62,7 +64,8 @@ program wsloop
 ! CHECK:         %[[VAL_30:.*]] = arith.constant 9 : i32
 ! CHECK:         %[[VAL_31:.*]] = arith.constant 1 : i32
 ! CHECK:         %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref<i32>
-! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait for  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
+! CHECK:         omp.wsloop   schedule(static = %[[VAL_32]] : i32) nowait {
+! CHECK:         omp.loopnest  (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) {
 ! CHECK:           fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref<i32>
 ! CHECK:           %[[VAL_39:.*]] = arith.constant 3 : i32
 ! CHECK:           %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90
index c93fcf4ef968d..85e1134e68720 100644
--- a/flang/test/Lower/OpenMP/wsloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90
@@ -49,7 +49,8 @@ program wsloop_collapse
 !CHECK:           %[[VAL_30:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
 !CHECK:           %[[VAL_32:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) {
   !$omp do collapse(3)
   do i = 1, a
      do j= 1, b
diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
index fba9105b98181..5d0cfb337bb6e 100644
--- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90
@@ -15,7 +15,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, monotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
index 1bd7a2edc0f52..024c4ebf433fb 100644
--- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
+++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90
@@ -17,7 +17,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(dynamic, nonmonotonic) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90
index 5185d2d085bac..5483b9228e035 100644
--- a/flang/test/Lower/OpenMP/wsloop-ordered.f90
+++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90
@@ -6,7 +6,8 @@
 subroutine wsloop_ordered_no_para()
   integer :: a(10), i
 
-! CHECK:  omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(0) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
@@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para()
   integer :: a(10), i
 
 ! CHECK: func @_QPwsloop_ordered_with_para() {
-! CHECK:  omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:  omp.wsloop ordered(1) {
+! CHECK:  omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:    omp.yield
 ! CHECK:  }
 
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
index 4d30282fc8c21..bd9f68ecc7e4b 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90
@@ -27,7 +27,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]])
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
index 7df4f37b98df8..a8f4e50f6a51c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90
@@ -68,7 +68,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -108,7 +109,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -149,7 +151,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -188,7 +191,8 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -236,7 +240,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -297,7 +302,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -365,7 +371,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:             omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
 ! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
index 9588531f6c909..6af928f6c9fb9 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
index a14a37101874c..8a7d9366cf387 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90
@@ -19,7 +19,8 @@
 !CHECK: omp.parallel
 !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
 !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) for
+!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref<i32>) {
+!CHECK: omp.loopnest
 !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref<i32>
 !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
index 3b5e327439358..8f28916724267 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
+! CHECK:             omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]])
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
index 17d321620cca8..4303f15990ecc 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -84,7 +85,8 @@ end subroutine simple_reduction
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -141,7 +143,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
index 8204e4c878cb0..994477f4edcec 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -83,7 +84,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
index 623368a50e864..29849b68be572 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -84,7 +85,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -143,7 +145,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
index f1ae1bc687cd5..719677ca729f2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90
@@ -36,7 +36,8 @@
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref<!fir.logical<4>>
@@ -83,7 +84,8 @@ subroutine simple_reduction(y)
 ! CHECK:             %[[VAL_12:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) {
 ! CHECK:               fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref<i32>
@@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y)
 ! CHECK:             %[[VAL_20:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_21:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_22:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  for  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
+! CHECK:             omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref<!fir.logical<4>>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref<!fir.logical<4>>)  {
+! CHECK:             omp.loopnest  (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) {
 ! CHECK:               fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
 ! CHECK:               %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
index ed25cedae90c6..45ae04f3712ac 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90
@@ -29,7 +29,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index ea3b1bebce038..09856f66b1e2c 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -40,7 +40,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -69,7 +70,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -88,7 +90,8 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:             omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
 ! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 3aa9001869dc5..eaac48b9e1b5a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -40,7 +40,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -69,7 +70,8 @@
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_10:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_11:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  for  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
 ! CHECK:               fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
@@ -90,7 +92,8 @@
 ! CHECK:             %[[VAL_32:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_33:.*]] = arith.constant 100 : i32
 ! CHECK:             %[[VAL_34:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  for  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
+! CHECK:             omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) {
 ! CHECK:               fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
index 4774fba3f33e9..4656960a45ba2 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90
@@ -61,7 +61,8 @@
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
@@ -97,7 +98,8 @@ subroutine simple_int_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<f32>
@@ -134,7 +136,8 @@ subroutine simple_real_reduction
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -170,7 +173,8 @@ subroutine simple_int_reduction_switch_order
 ! CHECK:             %[[VAL_7:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_8:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
 ! CHECK:               fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
@@ -215,7 +219,8 @@ subroutine simple_real_reduction_switch_order
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<i32>, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<i32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -273,7 +278,8 @@ subroutine multiple_int_reductions_same_type
 ! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_14:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  for  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref<f32>, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref<f32>)  {
+! CHECK:             omp.loopnest  (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
 ! CHECK:               fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
 ! CHECK:               %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
@@ -338,7 +344,8 @@ subroutine multiple_real_reductions_same_type
 ! CHECK:             %[[VAL_16:.*]] = arith.constant 1 : i32
 ! CHECK:             %[[VAL_17:.*]] = arith.constant 10 : i32
 ! CHECK:             %[[VAL_18:.*]] = arith.constant 1 : i32
-! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  for  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
+! CHECK:             omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref<i32>, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref<i64>, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref<f32>, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref<f64>)  {
+! CHECK:             omp.loopnest  (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) {
 ! CHECK:               fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref<i32>
 ! CHECK:               %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:               %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90
index c3d5e3e0cda59..490f1665a7b1b 100644
--- a/flang/test/Lower/OpenMP/wsloop-simd.f90
+++ b/flang/test/Lower/OpenMP/wsloop-simd.f90
@@ -14,7 +14,8 @@ program wsloop_dynamic
 !CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
 !CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
 !CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-!CHECK:     omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+!CHECK:     omp.wsloop schedule(runtime, simd) nowait {
+!CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
 !CHECK:       fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref<i32>
 
   do i=1, 9
diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
index 7fe63a1fe607c..f5c1a8126edc1 100644
--- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90
+++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90
@@ -29,7 +29,8 @@ end subroutine sub
 ! CHECK-SAME:                      %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
 ! CHECK-SAME:                      %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) {
 ! [...]
-! CHECK:             omp.wsloop for  (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
+! CHECK:             omp.wsloop {
+! CHECK:             omp.loopnest  (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
 ! [...]
 ! CHECK:               cf.br ^bb1
 ! CHECK:             ^bb1:
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index b3758f1fdc00f..e2a71e3d1b888 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -22,7 +22,8 @@ program wsloop_variable
 !CHECK:  %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64
 !CHECK:  %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64
 !CHECK:  %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref<i16>
 !CHECK:    fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref<i64>
@@ -46,7 +47,8 @@ program wsloop_variable
 !CHECK:  %[[TMP12:.*]] = arith.constant 1 : i32
 !CHECK:  %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32
 !CHECK:  %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32
-!CHECK:  omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]])  {
 !CHECK:    %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16
 !CHECK:    fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref<i16>
 !CHECK:    %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref<i16>
@@ -64,7 +66,8 @@ program wsloop_variable
 !CHECK:  %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64
 !CHECK:  %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64
 !CHECK:  %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64
-!CHECK:  omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
+!CHECK:  omp.wsloop {
+!CHECK:  omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]])  {
 !CHECK:    %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128
 !CHECK:    fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref<i128>
 !CHECK:    %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref<i128>
@@ -118,7 +121,8 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i16>
 !CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32
 !CHECK:           %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32
-!CHECK:           omp.wsloop   for  (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) {
 !CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16
 !CHECK:             fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref<i16>
 !CHECK:             %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i128>
@@ -160,7 +164,8 @@ subroutine wsloop_variable_sub
 !CHECK:           %[[VAL_50:.*]] = arith.constant 1 : i32
 !CHECK:           %[[VAL_51:.*]] = arith.constant 10 : i32
 !CHECK:           %[[VAL_52:.*]] = arith.constant 1 : i32
-!CHECK:           omp.wsloop   for  (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
+!CHECK:           omp.wsloop   {
+!CHECK:           omp.loopnest  (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) {
 !CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8
 !CHECK:             fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref<i8>
 !CHECK:             %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i8>
diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90
index 4068f715c3e18..71cd0ddc2763b 100644
--- a/flang/test/Lower/OpenMP/wsloop.f90
+++ b/flang/test/Lower/OpenMP/wsloop.f90
@@ -12,7 +12,8 @@ subroutine simple_loop
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO
   do i=1, 9
   ! CHECK:             fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref<i32>
@@ -36,7 +37,8 @@ subroutine simple_loop_with_step
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 2 : i32
-  ! CHECK:     omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
   ! CHECK:       %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref<i32>
   !$OMP DO
@@ -60,7 +62,8 @@ subroutine loop_with_schedule_nowait
   ! CHECK:     %[[WS_LB:.*]] = arith.constant 1 : i32
   ! CHECK:     %[[WS_UB:.*]] = arith.constant 9 : i32
   ! CHECK:     %[[WS_STEP:.*]] = arith.constant 1 : i32
-  ! CHECK:     omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]])
+  ! CHECK:     omp.wsloop schedule(runtime) nowait {
+  ! CHECK:     omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) {
   !$OMP DO SCHEDULE(runtime)
   do i=1, 9
   ! CHECK:       fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref<i32>
diff --git a/flang/test/Transforms/DoConcurrent/basic.f90 b/flang/test/Transforms/DoConcurrent/basic.f90
index a555a25c9bad5..248223d72ff11 100644
--- a/flang/test/Transforms/DoConcurrent/basic.f90
+++ b/flang/test/Transforms/DoConcurrent/basic.f90
@@ -23,7 +23,8 @@ program do_concurrent_basic
     ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index
     ! CHECK: %[[STEP:.*]] = arith.constant 1 : index
 
-    ! CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
+    ! CHECK: omp.wsloop {
+    ! CHECK: omp.loopnest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
     ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32
     ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref<i32>
     ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref<i32>
diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c
index 38a8fb8c3e213..a3bf38bdfb131 100644
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@@ -99,10 +99,13 @@ void testOmpCreation(void) {
 "    %1 = arith.constant 1 : i32                                                \n"
 "    %2 = arith.constant 2 : i32                                                \n"
 "    omp.parallel {                                                             \n"
-"      omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) {                     \n"
+"      omp.wsloop {                                                             \n"
+"      omp.loopnest (%3) : i32 = (%0) to (%2) step (%1) {                       \n"
 "        omp.yield                                                              \n"
 "      }                                                                        \n"
 "      omp.terminator                                                           \n"
+"      }                                                                        \n"
+"      omp.terminator                                                           \n"
 "    }                                                                          \n"
 "    llvm.return                                                                \n"
 "  }                                                                            \n"
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 6cbc0c8f4be9a..de5dfc1a288d0 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -71,7 +71,8 @@ func.func @branch_loop() {
 func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: omp.parallel
   omp.parallel {
-    // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
+    // CHECK: omp.wsloop {
+    // CHECK: omp.loopnest   (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
     "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
     ^bb0(%arg6: index, %arg7: index):
       // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
@@ -320,7 +321,8 @@ llvm.func @_QPsb() {
 // CHECK-LABEL:  @_QPsimple_reduction
 // CHECK:    %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr
 // CHECK:    omp.parallel
-// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for
+// CHECK:      omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) {
+// CHECK:      omp.loopnest
 // CHECK:        %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32
 // CHECK:        %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32
 // CHECK:        %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32
@@ -353,7 +355,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
   llvm.store %5, %4 : i32, !llvm.ptr
   omp.parallel   {
     %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array<i32: 0, 0>, pinned} : (i64) -> !llvm.ptr
-    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
+    omp.wsloop   reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) {
+    omp.loopnest  (%arg1) : i32 = (%1) to (%0) inclusive step (%1) {
       llvm.store %arg1, %6 : i32, !llvm.ptr
       %7 = llvm.load %6 : !llvm.ptr -> i32
       %8 = llvm.sext %7 : i32 to i64
@@ -367,6 +370,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
index acd2690c56e2e..d1e515b8ae813 100644
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@@ -5,7 +5,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index,
           %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest   (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
@@ -23,8 +24,9 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
                    %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
-    // CHECK: memref.alloca_scope
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: omp.parallel
     // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
@@ -47,7 +49,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
                      %arg3: index, %arg4: index, %arg5: index) {
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
   // CHECK: memref.alloca_scope
   scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
     // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> ()
@@ -60,7 +63,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
 
   // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32
   // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) {
-  // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+  // CHECK: omp.wsloop {
+  // CHECK: omp.loopnest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
   // CHECK: memref.alloca_scope
   scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
     // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> ()
diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
index 37720e98d92a9..f48f8152ee022 100644
--- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
+++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
@@ -32,7 +32,8 @@ llvm.func @repeated_successor_no_args(%arg0: i1) {
 
 // CHECK: @repeated_successor_openmp
 llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop {
+    omp.loopnest  (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
     llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
   // CHECK: ^[[BB1]]
@@ -41,6 +42,8 @@ llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3:
   // CHECK: ^[[BB2]](%[[ARG:.*]]: i64):
   // CHECK:  llvm.br ^[[BB1]](%[[ARG]] : i64)
   }
+  omp.terminator
+  }
   llvm.return
 }
 
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 448f37b32fff6..8a8ec498ed76b 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -98,7 +98,7 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) {
 
 func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
   // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{range argument type does not match corresponding IV type}}
     "omp.loopnest" (%lb, %ub, %step) ({
     ^bb0(%iv2: i32):
@@ -112,7 +112,7 @@ func.func @type_mismatch(%lb : index, %ub : index, %step : index) {
 
 func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
   // TODO Remove induction variables from omp.wsloop.
-  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop {
     // expected-error@+1 {{number of range arguments and IVs do not match}}
     "omp.loopnest" (%lb, %ub, %step) ({
     ^bb0(%iv1 : index, %iv2 : index):
@@ -125,7 +125,7 @@ func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{' to begin a region}}
   omp.wsloop nowait inclusive
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -145,7 +145,7 @@ func.func @order_value(%lb : index, %ub : index, %step : index) {
 // -----
 
 func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{'}}
   omp.wsloop if(%bool_var: i1)
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -155,7 +155,7 @@ func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i
 // -----
 
 func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) {
-  // expected-error @below {{expected 'for'}}
+  // expected-error @below {{expected '{'}}
   omp.wsloop num_threads(%int_var: i32)
   for (%iv) : index = (%lb) to (%ub) step (%step) {
     omp.yield
@@ -479,12 +479,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}}
-  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %1 : f32, !llvm.ptr
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -507,12 +509,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 
   // expected-error @below {{accumulator variable used more than once}}
-  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %0 : f32, !llvm.ptr
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -540,12 +544,14 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
   %c1 = arith.constant 1 : i32
 
   // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}}
-  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>)
-  for (%iv) : index = (%lb) to (%ub) step (%step) {
+  omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) {
+  omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) {
     %2 = arith.constant 2.0 : f32
     omp.reduction %2, %mem : f32, memref<1xf32>
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -577,27 +583,32 @@ omp.critical.declare @mutex hint(invalid_hint)
 // -----
 
 func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
     omp.ordered_region {
       omp.terminator
     }
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
 // -----
 
 func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
     omp.ordered_region {
       omp.terminator
     }
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -612,25 +623,29 @@ func.func @omp_ordered3(%vec0 : i64) -> () {
 // -----
 
 func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
-  omp.wsloop ordered(0)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(0) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
     omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
 
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 // -----
 
 func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
-  omp.wsloop ordered(1)
-  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
     omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}
 
     omp.yield
   }
+  omp.terminator
+  }
   return
 }
 
@@ -1462,12 +1477,14 @@ func.func @omp_cancel2() {
 // -----
 
 func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop nowait
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop nowait {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}}
     omp.cancel cancellation_construct_type(loop)
     // CHECK: omp.terminator
     omp.terminator
+  }
+    omp.terminator
   }
   return
 }
@@ -1475,12 +1492,14 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----
 
 func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop ordered(1)
-    for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop ordered(1) {
+    omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
     // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}}
     omp.cancel cancellation_construct_type(loop)
     // CHECK: omp.terminator
     omp.terminator
+  }
+    omp.terminator
   }
   return
 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
index afbf5f2224630..d1758035b8956 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
@@ -12,10 +12,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+        omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
         llvm.store %loop_cnt, %gep : i32, !llvm.ptr
         omp.yield
+        }
+        omp.terminator
       }
      omp.terminator
     }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
index 435aca32450c2..36fa2261e385c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
@@ -8,7 +8,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
     %loop_ub = llvm.mlir.constant(99 : i32) : i32
     %loop_lb = llvm.mlir.constant(0 : i32) : i32
     %loop_step = llvm.mlir.constant(1 : index) : i32
-    omp.wsloop for  (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
+    omp.wsloop {
+    omp.loopnest  (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
       %1 = llvm.add %arg1, %arg2  : i32
       %2 = llvm.mul %arg2, %loop_ub overflow<nsw>  : i32
       %3 = llvm.add %arg1, %2 :i32
@@ -16,6 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       llvm.store %1, %4 : i32, !llvm.ptr
       omp.yield
     }
+    omp.terminator
+    }
     llvm.return
   }
 }
diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
index 4cfb7d4f69514..0f48d45c53a09 100644
--- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
@@ -8,10 +8,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+      omp.loopnest  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
         llvm.store %loop_cnt, %gep : i32, !llvm.ptr
         omp.yield
+        }
+        omp.terminator
       }
     llvm.return
   }
@@ -20,8 +23,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
       %loop_ub = llvm.mlir.constant(9 : i32) : i32
       %loop_lb = llvm.mlir.constant(0 : i32) : i32
       %loop_step = llvm.mlir.constant(1 : i32) : i32
-      omp.wsloop for  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+      omp.wsloop {
+      omp.loopnest  (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
         omp.yield
+        }
+      omp.terminator
       }
     llvm.return
   }
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 3571526c078a5..a7393c47e1c5d 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -320,7 +320,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   omp.parallel {
-    "omp.wsloop"(%1, %0, %2) ({
+    omp.wsloop {
+    "omp.loopnest" (%1, %0, %2) ({
     ^bb0(%arg1: i64):
       // The form of the emitted IR is controlled by OpenMPIRBuilder and
       // tested there. Just check that the right functions are called.
@@ -334,6 +335,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
     }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
     omp.terminator
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -345,13 +348,16 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 31, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
+  omp.wsloop {
+  "omp.loopnest"(%1, %0, %2) ({
   ^bb0(%arg1: i64):
     %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
     %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
     llvm.store %3, %4 : f32, !llvm.ptr
     omp.yield
   }) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -363,13 +369,16 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) {
   %1 = llvm.mlir.constant(10 : index) : i64
   %2 = llvm.mlir.constant(1 : index) : i64
   // CHECK: store i64 32, ptr %{{.*}}upperbound
-  "omp.wsloop"(%1, %0, %2) ({
+  omp.wsloop {
+  "omp.loopnest"(%1, %0, %2) ({
   ^bb0(%arg1: i64):
     %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
     %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
     llvm.store %3, %4 : f32, !llvm.ptr
     omp.yield
   }) {inclusive, operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -379,13 +388,15 @@ llvm.func @body(i32)
 
 // CHECK-LABEL: @test_omp_wsloop_static_defchunk
 llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () {
- omp.wsloop schedule(static)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -396,13 +407,15 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_1
 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -413,13 +426,15 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_2
 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(2 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2)
    // CHECK: call void @__kmpc_for_static_fini
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -428,8 +443,8 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+ omp.wsloop schedule(dynamic) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
   // CHECK: call void @__kmpc_dispatch_init_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -437,6 +452,8 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -446,8 +463,8 @@ llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () {
  %chunk_size_const = llvm.mlir.constant(2 : i16) : i16
- omp.wsloop schedule(dynamic = %chunk_size_const : i16)
- for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
+ omp.wsloop schedule(dynamic = %chunk_size_const : i16) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step)  {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2)
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -455,6 +472,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -466,8 +485,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
  %1 = llvm.mlir.constant(1 : i64) : i64
  %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16
- omp.wsloop schedule(dynamic = %chunk_size_var : i16)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i16) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -475,6 +494,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
   // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
+ }
+   omp.terminator
  }
  llvm.return
 }
@@ -487,8 +508,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
  %1 = llvm.mlir.constant(1 : i64) : i64
  %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr
  %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64
- omp.wsloop schedule(dynamic = %chunk_size_var : i64)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i64) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -497,6 +518,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -505,8 +528,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
 llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () {
- omp.wsloop schedule(dynamic = %chunk_size : i32)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic = %chunk_size : i32) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}})
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -514,6 +537,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -522,8 +547,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(auto) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
   // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -531,6 +556,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -539,8 +566,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(runtime) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -548,6 +575,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -556,8 +585,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(guided) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -565,6 +594,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -573,8 +604,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, nonmonotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(dynamic, nonmonotonic) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -582,6 +613,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -590,8 +623,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(dynamic, monotonic)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(dynamic, monotonic) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -599,6 +632,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -607,8 +642,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(runtime, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(runtime, simd) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -616,6 +651,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -624,8 +661,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop schedule(guided, simd)
-  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+  omp.wsloop schedule(guided, simd) {
+  omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
     // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870
     // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
     // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@@ -633,6 +670,8 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> ()
     llvm.call @body(%iv) : (i64) -> ()
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -782,8 +821,8 @@ llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -792,6 +831,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -800,8 +841,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(static) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -810,6 +851,8 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -819,8 +862,8 @@ llvm.func @body(i32)
 
 llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () {
  %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0)
- for (%iv) : i32 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) {
+ omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1)
   // CHECK: call void @__kmpc_dispatch_fini_4u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@@ -829,6 +872,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3
    llvm.call @body(%iv) : (i32) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -837,8 +882,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -847,6 +892,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -855,8 +902,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(auto) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(auto) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -865,6 +912,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> ()
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -873,8 +922,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> ()
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(runtime) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(runtime) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -883,6 +932,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -891,8 +942,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(guided) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(guided) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -901,6 +952,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) ->
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -909,8 +962,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) ->
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, nonmonotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -919,6 +972,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -927,8 +982,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s
 llvm.func @body(i64)
 
 llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop schedule(dynamic, monotonic) ordered(0)
- for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+ omp.wsloop schedule(dynamic, monotonic) ordered(0) {
+ omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1)
   // CHECK: call void @__kmpc_dispatch_fini_8u
   // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
@@ -937,6 +992,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step
    llvm.call @body(%iv) : (i64) -> ()
    omp.yield
  }
+ omp.terminator
+ }
  llvm.return
 }
 
@@ -1103,8 +1160,8 @@ llvm.func @collapse_wsloop(
     // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
     // CHECK: store i32 %[[TOTAL_SUB_1]], ptr
     // CHECK: call void @__kmpc_for_static_init_4u
-    omp.wsloop
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+    omp.wsloop {
+    omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
       %31 = llvm.load %20 : !llvm.ptr -> i32
       %32 = llvm.add %31, %arg0 : i32
       %33 = llvm.add %32, %arg1 : i32
@@ -1113,6 +1170,8 @@ llvm.func @collapse_wsloop(
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -1164,8 +1223,8 @@ llvm.func @collapse_wsloop_dynamic(
     // CHECK: store i32 1, ptr
     // CHECK: store i32 %[[TOTAL]], ptr
     // CHECK: call void @__kmpc_dispatch_init_4u
-    omp.wsloop schedule(dynamic)
-    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
+    omp.wsloop schedule(dynamic) {
+    omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
       %31 = llvm.load %20 : !llvm.ptr -> i32
       %32 = llvm.add %31, %arg0 : i32
       %33 = llvm.add %32, %arg1 : i32
@@ -1174,6 +1233,8 @@ llvm.func @collapse_wsloop_dynamic(
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -1196,8 +1257,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
   // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_THREAD]])
   }
 
-  omp.wsloop ordered(0)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(0) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK:  call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]])
     omp.ordered_region  {
       omp.terminator
@@ -1205,9 +1266,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
     }
     omp.yield
   }
+  omp.terminator
+  }
 
-  omp.wsloop ordered(1)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(1) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
     // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8
     // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0
@@ -1224,9 +1287,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
 
     omp.yield
   }
+  omp.terminator
+  }
 
-  omp.wsloop ordered(2)
-  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
+  omp.wsloop ordered(2) {
+  omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
     // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0
     // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8
     // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1
@@ -1254,6 +1319,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
 
     omp.yield
   }
+  omp.terminator
+  }
 
   llvm.return
 }
@@ -2122,11 +2189,14 @@ llvm.func @omp_sections_with_clauses() -> () {
 // introduction mechanism itself is tested elsewhere.
 // CHECK-LABEL: @repeated_successor
 llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
+  omp.wsloop {
+  omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
     llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
   ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
     omp.yield
   }
+  omp.terminator
+  }
   llvm.return
 }
 
@@ -2549,8 +2619,8 @@ llvm.func @omp_opaque_pointers(%arg0 : !llvm.ptr, %arg1: !llvm.ptr, %expr: i32)
 // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1
-module attributes {omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true, 
-                                          assume_threads_oversubscription = true, assume_no_thread_state = true, 
+module attributes {omp.flags = #omp.flags<debug_kind = 1, assume_teams_oversubscription = true,
+                                          assume_threads_oversubscription = true, assume_no_thread_state = true,
                                           assume_no_nested_parallelism = true>} {}
 // -----
 
@@ -2595,8 +2665,8 @@ module attributes {omp.version = #omp.version<version = 51>} {}
 // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
-module attributes {omp.flags = #omp.flags<debug_kind = 0, assume_teams_oversubscription = false, 
-                                          assume_threads_oversubscription = false, assume_no_thread_state = false, 
+module attributes {omp.flags = #omp.flags<debug_kind = 0, assume_teams_oversubscription = false,
+                                          assume_threads_oversubscription = false, assume_no_thread_state = false,
                                           assume_no_nested_parallelism = false>} {}
 
 // -----
diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir
index e1fdfdd24a3cb..35bc63fb3b4a5 100644
--- a/mlir/test/Target/LLVMIR/openmp-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir
@@ -11,13 +11,17 @@ module {
       %2 = llvm.mlir.constant(0 : index) : i64
       %4 = llvm.mlir.constant(0 : i32) : i32
       %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr
-      omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0)  {
+      omp.wsloop {
+      omp.loopnest (%arg2) : i64 = (%2) to (%1) step (%0)  {
         omp.parallel   {
-          omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0)  {
+          omp.wsloop {
+          omp.loopnest (%arg3) : i64 = (%2) to (%0) step (%0)  {
             llvm.store %2, %12 : i64, !llvm.ptr
             omp.yield
           }
           omp.terminator
+          }
+          omp.terminator
         }
         %19 = llvm.load %12 : !llvm.ptr -> i64
         %20 = llvm.trunc %19 : i64 to i32
@@ -27,6 +31,8 @@ module {
         omp.yield
       }
       omp.terminator
+      }
+      omp.terminator
     }
     %a4 = llvm.mlir.constant(0 : i32) : i32
     llvm.return %a4 : i32
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index 9543458e950be..deeaf8219ce95 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -26,8 +26,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %2 = llvm.load %prv : !llvm.ptr -> f32
       %3 = llvm.fadd %1, %2 : f32
@@ -35,6 +35,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -105,8 +107,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -117,6 +119,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -195,8 +199,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -204,6 +208,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -280,8 +286,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
   %c1 = llvm.mlir.constant(1 : i32) : i32
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %2 = llvm.load %prv : !llvm.ptr -> f32
       %3 = llvm.fadd %2, %1 : f32
@@ -292,6 +298,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -374,8 +382,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
   omp.parallel {
-    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr)
-    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %1 = llvm.mlir.constant(2.0 : f32) : f32
       %3 = llvm.load %prv0 : !llvm.ptr -> f32
       %4 = llvm.fadd %3, %1 : f32
@@ -386,6 +394,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
   llvm.return
 }
@@ -529,9 +539,10 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
 
   %lb = llvm.mlir.constant(1 : i64) : i64
   %step = llvm.mlir.constant(1 : i64) : i64
-  
+
   omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) {
-    omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) {
+    omp.wsloop {
+    omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) {
       %ival = llvm.trunc %iv : i64 to i32
       %lprv = llvm.load %prv : !llvm.ptr -> i32
       %add = llvm.add %lprv, %ival : i32
@@ -539,6 +550,8 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
       omp.yield
     }
     omp.terminator
+    }
+    omp.terminator
   }
 
   llvm.return