diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 93fb0d8e8d078..19a4058b64382 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -484,7 +484,7 @@ class OpenMPIRBuilder { /// not have an effect on \p M (see initialize) OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()), OffloadInfoManager(this), - T(M.getTargetTriple()) {} + T(M.getTargetTriple()), IsFinalized(false) {} LLVM_ABI ~OpenMPIRBuilder(); class AtomicInfo : public llvm::AtomicInfo { @@ -521,6 +521,10 @@ class OpenMPIRBuilder { /// all functions are finalized. LLVM_ABI void finalize(Function *Fn = nullptr); + /// Check whether the finalize function has already run + /// \return true if the finalize function has already run + LLVM_ABI bool isFinalized(); + /// Add attributes known for \p FnID to \p Fn. LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -3286,6 +3290,8 @@ class OpenMPIRBuilder { Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, AtomicRMWInst::BinOp RMWOp); + bool IsFinalized; + public: /// a struct to pack relevant information while generating atomic Ops struct AtomicOpValue { diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7cbbbff511c88..dab8fbdeff160 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -824,8 +824,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) { M.getGlobalVariable("__openmp_nvptx_data_transfer_temporary_storage")}; emitUsed("llvm.compiler.used", LLVMCompilerUsed); } + + IsFinalized = true; } +bool OpenMPIRBuilder::isFinalized() { return IsFinalized; } + OpenMPIRBuilder::~OpenMPIRBuilder() { assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 1210bdf4a1c98..6d1b8d67fd694 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1563,12 +1563,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs, NewValues); - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { - newFunction->dump(); - report_fatal_error("verification of newFunction failed!"); - }); - LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n"); + LLVM_DEBUG(newFunction->dump()); + LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n"); + LLVM_DEBUG(oldFunction->dump()); LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) report_fatal_error("Stale Asumption cache for old Function!")); return newFunction; @@ -1868,8 +1866,14 @@ CallInst *CodeExtractor::emitReplacerCall( // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); + // In cases with multiple levels of outlining, e.g. with OpenMP, + // AllocationBlock may end up in a function different than oldFunction. We + // need to make sure we do not use it in those cases, otherwise the alloca + // will end up in a different function from its users and break the module. BasicBlock *AllocaBlock = - AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock(); + (AllocationBlock && oldFunction == AllocationBlock->getParent()) + ? AllocationBlock + : &oldFunction->getEntryBlock(); // Update the entry count of the function. if (BFI) diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll index 8bc71148352d2..5bc733f5622c7 100644 --- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll +++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s ; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index e5ca147ea98f8..93d2967229cca 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -777,7 +777,7 @@ ModuleTranslation::ModuleTranslation(Operation *module, } ModuleTranslation::~ModuleTranslation() { - if (ompBuilder) + if (ompBuilder && !ompBuilder->isFinalized()) ompBuilder->finalize(); } @@ -2332,6 +2332,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext, // beforehand. translator.debugTranslation->addModuleFlagsIfNotPresent(); + // Call the OpenMP IR Builder callbacks prior to verifying the module + if (auto *ompBuilder = translator.getOpenMPBuilder()) + ompBuilder->finalize(); + if (!disableVerification && llvm::verifyModule(*translator.llvmModule, &llvm::errs())) return nullptr; diff --git a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir new file mode 100644 index 0000000000000..1589778e0627f --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir @@ -0,0 +1,62 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// This tests the fix for https://github.com/llvm/llvm-project/issues/138102 +// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash + +// CHECK-LABEL: define internal void @_QQmain..omp_par + +omp.private {type = private} @_QFEi_private_i32 : i32 +omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): +%0 = llvm.load %arg0 : !llvm.ptr -> i32 +llvm.store %0, %arg1 : i32, !llvm.ptr +omp.yield(%arg1 : !llvm.ptr) +} +llvm.func @_QQmain() { +%0 = llvm.mlir.constant(1 : i64) : i64 +%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr +%2 = llvm.mlir.constant(1 : i64) : i64 +%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr +%4 = llvm.mlir.constant(10 : index) : i64 +%5 = llvm.mlir.constant(0 : index) : i64 +%6 = llvm.mlir.constant(10000 : index) : i64 +%7 = llvm.mlir.constant(1 : index) : i64 +%8 = llvm.mlir.constant(1 : i64) : i64 +%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr +%10 = llvm.mlir.constant(1 : i64) : i64 +%11 = llvm.trunc %7 : i64 to i32 +llvm.br ^bb1(%11, %4 : i32, i64) +^bb1(%12: i32, %13: i64): // 2 preds: ^bb0, ^bb2 +%14 = llvm.icmp "sgt" %13, %5 : i64 +llvm.store %12, %3 : i32, !llvm.ptr +omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) { + %19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} + %20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"} + %21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"} + omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + %22 = llvm.mlir.constant(9999 : i32) : i32 + %23 = llvm.mlir.constant(1 : i32) : i32 + omp.parallel { + %24 = llvm.load %arg2 : !llvm.ptr -> i32 + %25 = llvm.add %24, %22 : i32 + omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) { + omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) { + llvm.store %arg5, %arg4 : i32, !llvm.ptr + omp.yield + } + } + omp.terminator + } + omp.terminator + } + omp.terminator +} +llvm.return +} +llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 { +%0 = llvm.mlir.constant(10000 : i32) : i32 +llvm.return %0 : i32 +} +llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 { +%0 = llvm.mlir.constant(100000 : i32) : i32 +llvm.return %0 : i32 +}