diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 43b90193406fc..a52dd2db8035d 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -118,6 +118,11 @@ jobs: else builddir="$(pwd)"/build fi + if [ "${{ runner.os }}" == "macOS" ]; then + # Workaround test failure on some lld tests on MacOS + # https://github.com/llvm/llvm-project/issues/81967 + extra_cmake_args="-DLLVM_DISABLE_ASSEMBLY_FILES=ON" + fi echo "llvm-builddir=$builddir" >> "$GITHUB_OUTPUT" cmake -G Ninja \ -B "$builddir" \ diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 5758b5acbc0b5..8621444364fb2 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -51,21 +51,35 @@ Improvements to clangd Inlay hints ^^^^^^^^^^^ -Diagnostics -^^^^^^^^^^^ - -Semantic Highlighting -^^^^^^^^^^^^^^^^^^^^^ +- Type hints + * Improved heuristics for showing sugared vs. desguared types + * Some hints which provide no information (e.g. ````) are now omitted +- Parameter hints + * Parameter hints are now shown for calls through function pointers + * Parameter hints are now shown for calls to a class's ``operator()`` + * No longer show bogus parameter hints for some builtins like ``__builtin_dump_struct`` Compile flags ^^^^^^^^^^^^^ +- System include extractor (``--query-driver``) improvements + * The directory containing builtin headers is now excluded from extracted system includes + * Various flags which can affect the system includes (``--target``, ``--stdlib``, ``-specs``) are now forwarded to the driver + * Fixed a bug where clangd would sometimes try to call a driver that didn't have obj-c support with ``-x objective-c++-header`` + * The driver path is now dot-normalized before being compared to the ``--query-driver`` pattern + * ``--query-driver`` is now supported by ``clangd-indexer`` +- Fixed a regression in clangd 17 where response files would not be expanded + Hover ^^^^^ +- Hover now shows alignment info for fields and records + Code completion ^^^^^^^^^^^^^^^ +- Refined heuristics for determining whether the use of a function can be a call or not + Code actions ^^^^^^^^^^^^ @@ -75,15 +89,25 @@ Code actions Signature help ^^^^^^^^^^^^^^ +- Improved support for calls through function pointer types + Cross-references ^^^^^^^^^^^^^^^^ +- Improved support for C++20 concepts +- Find-references now works for labels +- Improvements to template heuristics + Objective-C ^^^^^^^^^^^ Miscellaneous ^^^^^^^^^^^^^ +- Various stability improvements, e.g. crash fixes +- Improved error recovery on invalid code +- Clangd now bails gracefully on assembly and IR source files + Improvements to clang-doc ------------------------- @@ -564,10 +588,15 @@ Changes in existing checks Removed checks ^^^^^^^^^^^^^^ -Improvements to include-fixer +Improvements to include-cleaner ----------------------------- -The improvements are... +- Support for ``--only-headers`` flag to limit analysis to headers matching a regex +- Recognizes references through ``concept``s +- Builtin headers are not analyzed +- Handling of references through ``friend`` declarations +- Fixes around handling of IWYU pragmas on stdlib headers +- Improved handling around references to/from template specializations Improvements to clang-include-fixer ----------------------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index fc27297aea2d6..d4401a43c123e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1103,6 +1103,8 @@ Bug Fixes to C++ Support (`#82258 `_) - Correctly immediate-escalate lambda conversion functions. (`#82258 `_) +- Fix a crash when an unresolved overload set is encountered on the RHS of a ``.*`` operator. + (`#53815 `_) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1325,6 +1327,11 @@ AIX Support or newer. Similar to the LTO support on AIX, ThinLTO is implemented with the libLTO.so plugin. +SystemZ Support +^^^^^^^^^^^^^^^ +- Properly support 16 byte atomic int/fp types and ops. Atomic __int128 (and + long double) variables are now aligned to 16 bytes by default (like gcc 14). + WebAssembly Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 7877e20d77f77..4f22d35f9d3a9 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -186,6 +186,14 @@ class EmitAssemblyHelper { TargetTriple.getVendor() != llvm::Triple::Apple; } + /// Check whether we should emit a flag for UnifiedLTO. + /// The UnifiedLTO module flag should be set when UnifiedLTO is enabled for + /// ThinLTO or Full LTO with module summaries. + bool shouldEmitUnifiedLTOModueFlag() const { + return CodeGenOpts.UnifiedLTO && + (CodeGenOpts.PrepareForThinLTO || shouldEmitRegularLTOSummary()); + } + public: EmitAssemblyHelper(DiagnosticsEngine &_Diags, const HeaderSearchOptions &HeaderSearchOpts, @@ -1029,7 +1037,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule) MPM.addPass(VerifierPass()); - if (Action == Backend_EmitBC || Action == Backend_EmitLL) { + if (Action == Backend_EmitBC || Action == Backend_EmitLL || + CodeGenOpts.FatLTO) { if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", @@ -1040,11 +1049,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!ThinLinkOS) return; } - if (CodeGenOpts.UnifiedLTO) - TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); MPM.addPass(ThinLTOBitcodeWriterPass( *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); - } else { + } else if (Action == Backend_EmitLL) { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, /*EmitLTOSummary=*/true)); } @@ -1058,24 +1065,17 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", uint32_t(1)); - if (CodeGenOpts.UnifiedLTO) - TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); } - if (Action == Backend_EmitBC) + if (Action == Backend_EmitBC) { MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); - else + } else if (Action == Backend_EmitLL) { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); + } } - } - if (CodeGenOpts.FatLTO) { - // Set the EnableSplitLTOUnit and UnifiedLTO module flags, since FatLTO - // uses a different action than Backend_EmitBC or Backend_EmitLL. - if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit", - uint32_t(CodeGenOpts.EnableSplitLTOUnit)); - if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO")) + + if (shouldEmitUnifiedLTOModueFlag()) TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1)); } diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h index a613e5ca0e5ec..f4218295919a0 100644 --- a/clang/lib/Headers/larchintrin.h +++ b/clang/lib/Headers/larchintrin.h @@ -156,7 +156,7 @@ extern __inline unsigned char return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } -extern __inline unsigned char +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 940bcccb9e261..b708272ebe7d8 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -14470,6 +14470,23 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, CurFPFeatureOverrides()); } + // If this is the .* operator, which is not overloadable, just + // create a built-in binary operator. + if (Opc == BO_PtrMemD) { + auto CheckPlaceholder = [&](Expr *&Arg) { + ExprResult Res = CheckPlaceholderExpr(Arg); + if (Res.isUsable()) + Arg = Res.get(); + return !Res.isUsable(); + }; + + // CreateBuiltinBinOp() doesn't like it if we tell it to create a '.*' + // expression that contains placeholders (in either the LHS or RHS). + if (CheckPlaceholder(Args[0]) || CheckPlaceholder(Args[1])) + return ExprError(); + return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]); + } + // Always do placeholder-like conversions on the RHS. if (checkPlaceholderForOverload(*this, Args[1])) return ExprError(); @@ -14489,11 +14506,6 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, if (Opc == BO_Assign && !Args[0]->getType()->isOverloadableType()) return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]); - // If this is the .* operator, which is not overloadable, just - // create a built-in binary operator. - if (Opc == BO_PtrMemD) - return CreateBuiltinBinOp(OpLoc, Opc, Args[0], Args[1]); - // Build the overload set. OverloadCandidateSet CandidateSet(OpLoc, OverloadCandidateSet::CSK_Operator, OverloadCandidateSet::OperatorRewriteInfo( diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 0ac1d91b79beb..bc14aea27f673 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -1409,7 +1409,7 @@ CallEventManager::getSimpleCall(const CallExpr *CE, ProgramStateRef State, if (const auto *OpCE = dyn_cast(CE)) { const FunctionDecl *DirectCallee = OpCE->getDirectCallee(); if (const auto *MD = dyn_cast(DirectCallee)) - if (MD->isInstance()) + if (MD->isImplicitObjectMemberFunction()) return create(OpCE, State, LCtx, ElemRef); } else if (CE->getCallee()->getType()->isBlockPointerType()) { diff --git a/clang/test/Analysis/cxx2b-deducing-this.cpp b/clang/test/Analysis/cxx2b-deducing-this.cpp index d22a897097bec..2ec9e96bf0f84 100644 --- a/clang/test/Analysis/cxx2b-deducing-this.cpp +++ b/clang/test/Analysis/cxx2b-deducing-this.cpp @@ -60,3 +60,14 @@ void top() { s.c(); s.c(11); } + + +struct S2 { + bool operator==(this auto, S2) { + return true; + } +}; +void use_deducing_this() { + int result = S2{} == S2{}; // no-crash + clang_analyzer_dump(result); // expected-warning {{1 S32b}} +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c index 93d54f511a9cd..eb3f8cbe7ac4c 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c @@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { // LA32-LABEL: @rdtime( // LA32-NEXT: entry: -// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 -// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] // LA32-NEXT: ret void // void rdtime() { @@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { __builtin_loongarch_movgr2fcsr(1, a); } -// CHECK-LABEL: @cacop_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) -// CHECK-NEXT: ret void +// LA32-LABEL: @cacop_w( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +// LA32-NEXT: ret void // void cacop_w(unsigned long int a) { __cacop_w(1, a, 1024); __builtin_loongarch_cacop_w(1, a, 1024); } + +// LA32-LABEL: @iocsrrd_h_result( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// LA32-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c index a740882eef541..50ec358f546ec 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c @@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { // CHECK-LABEL: @rdtime_d( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] // CHECK-NEXT: ret void // void rdtime_d() { @@ -396,8 +396,8 @@ void rdtime_d() { // CHECK-LABEL: @rdtime( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 -// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] // CHECK-NEXT: ret void // void rdtime() { @@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { __movgr2fcsr(1, a); __builtin_loongarch_movgr2fcsr(1, a); } + +// CHECK-LABEL: @iocsrrd_h_result( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// CHECK-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/fat-lto-objects.c b/clang/test/CodeGen/fat-lto-objects.c index afce798c5c819..b50567c024fc8 100644 --- a/clang/test/CodeGen/fat-lto-objects.c +++ b/clang/test/CodeGen/fat-lto-objects.c @@ -11,10 +11,11 @@ // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.split.bc %t.full.split.o // RUN: llvm-dis %t.full.split.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,NOUNIFIED +/// Full LTO always sets EnableSplitLTOUnit when the summary is used. // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -emit-obj < %s -o %t.full.nosplit.o // RUN: llvm-readelf -S %t.full.nosplit.o | FileCheck %s --check-prefixes=ELF // RUN: llvm-objcopy --dump-section=.llvm.lto=%t.full.nosplit.bc %t.full.nosplit.o -// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,NOSPLIT,NOUNIFIED +// RUN: llvm-dis %t.full.nosplit.bc -o - | FileCheck %s --check-prefixes=FULL,SPLIT,NOUNIFIED // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=thin -fsplit-lto-unit -ffat-lto-objects -emit-obj < %s -o %t.thin.split.o // RUN: llvm-readelf -S %t.thin.split.o | FileCheck %s --check-prefixes=ELF @@ -34,6 +35,21 @@ // RUN: %clang -cc1 -triple x86_64-unknown-linux-gnu -flto=full -ffat-lto-objects -fsplit-lto-unit -S < %s -o - \ // RUN: | FileCheck %s --check-prefixes=ASM +/// Make sure that FatLTO generates .llvm.lto sections that are the same as the output from normal LTO compilations +// RUN: %clang -O2 --target=x86_64-unknown-linux-gnu -fPIE -flto=full -ffat-lto-objects -c %s -o %t.fatlto.full.o +// RUN: llvm-objcopy --dump-section=.llvm.lto=%t.fatlto.full.bc %t.fatlto.full.o +// RUN: llvm-dis < %t.fatlto.full.bc -o %t.fatlto.full.ll +// RUN: %clang -O2 --target=x86_64-unknown-linux-gnu -fPIE -flto=full -c %s -o %t.nofat.full.bc +// RUN: llvm-dis < %t.nofat.full.bc -o %t.nofat.full.ll +// RUN: diff %t.fatlto.full.ll %t.nofat.full.ll + +// RUN: %clang -O2 --target=x86_64-unknown-linux-gnu -fPIE -flto=thin -ffat-lto-objects -c %s -o %t.fatlto.thin.o +// RUN: llvm-objcopy --dump-section=.llvm.lto=%t.fatlto.thin.bc %t.fatlto.thin.o +// RUN: llvm-dis < %t.fatlto.thin.bc -o %t.fatlto.thin.ll +// RUN: %clang -O2 --target=x86_64-unknown-linux-gnu -fPIE -flto=thin -c %s -o %t.nofat.thin.bc +// RUN: llvm-dis < %t.nofat.thin.bc -o %t.nofat.thin.ll +// RUN: diff %t.fatlto.thin.ll %t.nofat.thin.ll + /// Be sure we enable split LTO units correctly under -ffat-lto-objects. // SPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 1} // NOSPLIT: ![[#]] = !{i32 1, !"EnableSplitLTOUnit", i32 0} @@ -51,6 +67,9 @@ // ASM-NEXT: .asciz "BC // ASM-NEXT: .size .Lllvm.embedded.object +const char* foo = "foo"; + int test(void) { + const char* bar = "bar"; return 0xabcd; } diff --git a/clang/test/SemaCXX/gh53815.cpp b/clang/test/SemaCXX/gh53815.cpp new file mode 100644 index 0000000000000..326c911c7bfaf --- /dev/null +++ b/clang/test/SemaCXX/gh53815.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s +// expected-no-diagnostics + +// Check that we don't crash due to forgetting to check for placeholders +// in the RHS of '.*'. + +template +static bool has_explicitly_named_overload() { + return requires { Fn().*&Fn::operator(); }; +} + +int main() { + has_explicitly_named_overload(); +} + +template +constexpr bool has_explicitly_named_overload_2() { + return requires { Fn().*&Fn::operator(); }; +} + +static_assert(!has_explicitly_named_overload_2()); diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c index e970cef574b21..099de5802daf0 100644 --- a/compiler-rt/lib/builtins/divtc3.c +++ b/compiler-rt/lib/builtins/divtc3.c @@ -13,7 +13,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_TF_MODE) +#if defined(CRT_HAS_F128) // Returns: the quotient of (a + ib) / (c + id) diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h index af406e760497a..c4f0a5b9587f7 100644 --- a/compiler-rt/lib/builtins/fp_lib.h +++ b/compiler-rt/lib/builtins/fp_lib.h @@ -22,6 +22,7 @@ #include "int_lib.h" #include "int_math.h" +#include "int_types.h" #include #include #include @@ -93,13 +94,14 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); #elif defined QUAD_PRECISION -#if defined(CRT_HAS_TF_MODE) +#if defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT) typedef uint64_t half_rep_t; typedef __uint128_t rep_t; typedef __int128_t srep_t; typedef tf_float fp_t; #define HALF_REP_C UINT64_C #define REP_C (__uint128_t) +#if defined(CRT_HAS_IEEE_TF) // Note: Since there is no explicit way to tell compiler the constant is a // 128-bit integer, we let the constant be casted to 128-bit integer #define significandBits 112 @@ -188,7 +190,10 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { #undef Word_HiMask #undef Word_LoMask #undef Word_FullMask -#endif // defined(CRT_HAS_TF_MODE) +#endif // defined(CRT_HAS_IEEE_TF) +#else +typedef long double fp_t; +#endif // defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT) #else #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. #endif @@ -196,19 +201,6 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ (defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE)) #define typeWidth (sizeof(rep_t) * CHAR_BIT) -#define exponentBits (typeWidth - significandBits - 1) -#define maxExponent ((1 << exponentBits) - 1) -#define exponentBias (maxExponent >> 1) - -#define implicitBit (REP_C(1) << significandBits) -#define significandMask (implicitBit - 1U) -#define signBit (REP_C(1) << (significandBits + exponentBits)) -#define absMask (signBit - 1U) -#define exponentMask (absMask ^ significandMask) -#define oneRep ((rep_t)exponentBias << significandBits) -#define infRep exponentMask -#define quietBit (implicitBit >> 1) -#define qnanRep (exponentMask | quietBit) static __inline rep_t toRep(fp_t x) { const union { @@ -226,6 +218,21 @@ static __inline fp_t fromRep(rep_t x) { return rep.f; } +#if !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + static __inline int normalize(rep_t *significand) { const int shift = rep_clz(*significand) - rep_clz(implicitBit); *significand <<= shift; @@ -328,6 +335,8 @@ static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) { return fromRep(sign | ((rep_t)exp << significandBits) | sig); } +#endif // !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF) + // Avoid using fmax from libm. static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) { // If either argument is NaN, return the other argument. If both are NaN, @@ -405,6 +414,8 @@ static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) { #define __compiler_rt_logbl crt_logbl #define __compiler_rt_scalbnl crt_scalbnl #define __compiler_rt_fmaxl crt_fmaxl +#define crt_fabstf crt_fabsl +#define crt_copysigntf crt_copysignl #else #error Unsupported TF mode type #endif diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h index 7624c72806151..ca97391fc2846 100644 --- a/compiler-rt/lib/builtins/int_types.h +++ b/compiler-rt/lib/builtins/int_types.h @@ -189,12 +189,16 @@ typedef long double tf_float; #define CRT_LDBL_IEEE_F128 #endif #define TF_C(x) x##L -#elif __LDBL_MANT_DIG__ == 113 -// Use long double instead of __float128 if it matches the IEEE 128-bit format. +#elif __LDBL_MANT_DIG__ == 113 || \ + (__FLT_RADIX__ == 16 && __LDBL_MANT_DIG__ == 28) +// Use long double instead of __float128 if it matches the IEEE 128-bit format +// or the IBM hexadecimal format. #define CRT_LDBL_128BIT #define CRT_HAS_F128 +#if __LDBL_MANT_DIG__ == 113 #define CRT_HAS_IEEE_TF #define CRT_LDBL_IEEE_F128 +#endif typedef long double tf_float; #define TF_C(x) x##L #elif defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) diff --git a/compiler-rt/lib/builtins/multc3.c b/compiler-rt/lib/builtins/multc3.c index f20e53ccbf233..61a3f45e47279 100644 --- a/compiler-rt/lib/builtins/multc3.c +++ b/compiler-rt/lib/builtins/multc3.c @@ -15,7 +15,7 @@ #include "int_lib.h" #include "int_math.h" -#if defined(CRT_HAS_TF_MODE) +#if defined(CRT_HAS_F128) // Returns: the product of a + ib and c + id diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter_default_impl.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter_default_impl.cpp index 1ac04b53491e1..1d1fbf7299e8b 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter_default_impl.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_inline8bit_counter_default_impl.cpp @@ -3,7 +3,9 @@ // REQUIRES: has_sancovcc,stable-runtime,linux,x86_64-target-arch -// RUN: %clangxx -O0 %s -fsanitize-coverage=inline-8bit-counters,pc-table -o %t +/// In glibc 2.39+, fprintf has a nonnull attribute. Disable nonnull-attribute, +/// which would increase counters for ubsan. +// RUN: %clangxx -O0 %s -fsanitize-coverage=inline-8bit-counters,pc-table -fno-sanitize=nonnull-attribute -o %t // RUN: rm -f %t-counters %t-pcs // RUN: env %tool_options="cov_8bit_counters_out=%t-counters cov_pcs_out=%t-pcs verbosity=1" %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cpp index daa994c811625..b168954a1c92c 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_symbolize.cpp @@ -7,7 +7,9 @@ // RUN: rm -rf $DIR // RUN: mkdir -p $DIR // RUN: cd $DIR -// RUN: %clangxx -O0 -fsanitize-coverage=trace-pc-guard %s -o %t +/// In glibc 2.39+, fprintf has a nonnull attribute. Disable nonnull-attribute, +/// which would increase counters for ubsan. +// RUN: %clangxx -O0 -fsanitize-coverage=trace-pc-guard -fno-sanitize=nonnull-attribute %s -o %t // RUN: %env_tool_opts=coverage=1 %t 2>&1 | FileCheck %s // RUN: rm -rf $DIR diff --git a/libcxx/include/__availability b/libcxx/include/__availability index c5069a027750e..b8b2da9bb1226 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -72,11 +72,10 @@ # endif #endif -// Availability markup is disabled when building the library, or when the compiler +// Availability markup is disabled when building the library, or when a non-Clang +// compiler is used because only Clang supports the necessary attributes. // doesn't support the proper attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif diff --git a/libcxx/modules/modules.json.in b/libcxx/modules/modules.json.in index ddc377f28f919..759ac92d81f18 100644 --- a/libcxx/modules/modules.json.in +++ b/libcxx/modules/modules.json.in @@ -5,7 +5,7 @@ { "logical-name": "std", "source-path": "@LIBCXX_MODULE_RELATIVE_PATH@/std.cppm", - "is-standard-library": true, + "is-std-library": true, "local-arguments": { "system-include-directories": [ "@LIBCXX_MODULE_RELATIVE_PATH@" diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 44a088663463c..1b80625304a41 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -306,7 +306,10 @@ if (LIBCXX_ENABLE_STATIC) # then its code shouldn't declare them with hidden visibility. They might # actually be provided by a shared library at link time. if (LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS) - append_flags_if_supported(CXX_STATIC_LIBRARY_FLAGS -fvisibility-global-new-delete-hidden) + append_flags_if_supported(CXX_STATIC_LIBRARY_FLAGS -fvisibility-global-new-delete=force-hidden) + if (NOT CXX_SUPPORTS_FVISIBILITY_GLOBAL_NEW_DELETE_EQ_FORCE_HIDDEN_FLAG) + append_flags_if_supported(CXX_STATIC_LIBRARY_FLAGS -fvisibility-global-new-delete-hidden) + endif() endif() target_compile_options(cxx_static PRIVATE ${CXX_STATIC_LIBRARY_FLAGS}) # _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS can be defined in __config_site diff --git a/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp new file mode 100644 index 0000000000000..c55a0a4d6e5d1 --- /dev/null +++ b/libcxx/test/libcxx/vendor/apple/availability-with-pedantic-errors.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: stdlib=apple-libc++ + +// Test that using -pedantic-errors doesn't turn off availability annotations. +// This used to be the case because we used __has_extension(...) to enable the +// availability annotations, and -pedantic-errors changes the behavior of +// __has_extension(...) in an incompatible way. + +// ADDITIONAL_COMPILE_FLAGS: -pedantic-errors + +#include <__availability> + +#if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# error Availability annotations should be enabled on Apple platforms in the system configuration! +#endif diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt index 4198827203fc8..f4722c3b352d4 100644 --- a/libcxxabi/src/CMakeLists.txt +++ b/libcxxabi/src/CMakeLists.txt @@ -265,7 +265,10 @@ if(LIBCXXABI_HERMETIC_STATIC_LIBRARY) # then its code shouldn't declare them with hidden visibility. They might # actually be provided by a shared library at link time. if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS) - target_add_compile_flags_if_supported(cxxabi_static_objects PRIVATE -fvisibility-global-new-delete-hidden) + target_add_compile_flags_if_supported(cxxabi_static_objects PRIVATE -fvisibility-global-new-delete=force-hidden) + if (NOT CXX_SUPPORTS_FVISIBILITY_GLOBAL_NEW_DELETE_EQ_FORCE_HIDDEN_FLAG) + target_add_compile_flags_if_supported(cxxabi_static_objects PRIVATE -fvisibility-global-new-delete-hidden) + endif() endif() # _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS can be defined in libcxx's # __config_site too. Define it in the same way here, to avoid redefinition diff --git a/libunwind/src/CMakeLists.txt b/libunwind/src/CMakeLists.txt index 389143b43f814..facf371d9141c 100644 --- a/libunwind/src/CMakeLists.txt +++ b/libunwind/src/CMakeLists.txt @@ -240,7 +240,10 @@ set_target_properties(unwind_static_objects if(LIBUNWIND_HIDE_SYMBOLS) target_add_compile_flags_if_supported(unwind_static_objects PRIVATE -fvisibility=hidden) - target_add_compile_flags_if_supported(unwind_static_objects PRIVATE -fvisibility-global-new-delete-hidden) + target_add_compile_flags_if_supported(unwind_static_objects PRIVATE -fvisibility-global-new-delete=force-hidden) + if (NOT CXX_SUPPORTS_FVISIBILITY_GLOBAL_NEW_DELETE_EQ_FORCE_HIDDEN_FLAG) + target_add_compile_flags_if_supported(unwind_static_objects PRIVATE -fvisibility-global-new-delete-hidden) + endif() target_compile_definitions(unwind_static_objects PRIVATE _LIBUNWIND_HIDE_SYMBOLS) endif() diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 6b516d8c6d5ef..c4388ba9e40d0 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -172,7 +172,7 @@ binImports(COFFLinkerContext &ctx, // A chunk for the delay import descriptor table etnry. class DelayDirectoryChunk : public NonSectionChunk { public: - explicit DelayDirectoryChunk(Chunk *n) : dllName(n) {} + explicit DelayDirectoryChunk(Chunk *n) : dllName(n) { setAlignment(4); } size_t getSize() const override { return sizeof(delay_import_directory_table_entry); diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 05fd38fb753fd..8a6f6db68f290 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "llvm/Support/LEB128.h" using namespace llvm; using namespace llvm::object; @@ -41,6 +42,7 @@ class LoongArch final : public TargetInfo { }; } // end anonymous namespace +namespace { enum Op { SUB_W = 0x00110000, SUB_D = 0x00118000, @@ -65,6 +67,7 @@ enum Reg { R_T2 = 14, R_T3 = 15, }; +} // namespace // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` @@ -153,6 +156,16 @@ static bool isJirl(uint32_t insn) { return (insn & 0xfc000000) == JIRL; } +static void handleUleb128(uint8_t *loc, uint64_t val) { + const uint32_t maxcount = 1 + 64 / 7; + uint32_t count; + uint64_t orig = decodeULEB128(loc, &count); + if (count > maxcount) + errorOrWarn(getErrorLocation(loc) + "extra space for uleb128"); + uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL; + encodeULEB128((orig + val) & mask, loc, count); +} + LoongArch::LoongArch() { // The LoongArch ISA itself does not have a limit on page sizes. According to // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is @@ -394,11 +407,13 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_ADD16: case R_LARCH_ADD32: case R_LARCH_ADD64: + case R_LARCH_ADD_ULEB128: case R_LARCH_SUB6: case R_LARCH_SUB8: case R_LARCH_SUB16: case R_LARCH_SUB32: case R_LARCH_SUB64: + case R_LARCH_SUB_ULEB128: // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse // the RelExpr to avoid code duplication. return R_RISCV_ADD; @@ -633,6 +648,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_ADD64: write64le(loc, read64le(loc) + val); return; + case R_LARCH_ADD_ULEB128: + handleUleb128(loc, val); + return; case R_LARCH_SUB6: *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); return; @@ -648,6 +666,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_SUB64: write64le(loc, read64le(loc) - val); return; + case R_LARCH_SUB_ULEB128: + handleUleb128(loc, -val); + return; case R_LARCH_MARK_LA: case R_LARCH_MARK_PCREL: diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index de52f6a79a40b..019c073bd541b 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -26,6 +26,7 @@ using namespace lld::elf; constexpr uint64_t ppc64TocOffset = 0x8000; constexpr uint64_t dynamicThreadPointerOffset = 0x8000; +namespace { // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform // instructions that can be used as part of the initial exec TLS sequence. enum XFormOpcd { @@ -139,6 +140,7 @@ enum class PPCPrefixedInsn : uint64_t { PSTXV = PREFIX_8LS | 0xd8000000, PSTXVP = PREFIX_8LS | 0xf8000000 }; + static bool checkPPCLegacyInsn(uint32_t encoding) { PPCLegacyInsn insn = static_cast(encoding); if (insn == PPCLegacyInsn::NOINSN) @@ -164,7 +166,6 @@ enum class LegacyToPrefixMask : uint64_t { 0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5. }; -namespace { class PPC64 final : public TargetInfo { public: PPC64(); diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 5fcab4d39d43a..4798c86f7d1b6 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -57,6 +57,7 @@ class RISCV final : public TargetInfo { const uint64_t dtpOffset = 0x800; +namespace { enum Op { ADDI = 0x13, AUIPC = 0x17, @@ -78,6 +79,7 @@ enum Reg { X_A0 = 10, X_T3 = 28, }; +} // namespace static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } static uint32_t lo12(uint32_t val) { return val & 4095; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6df43a34be013..8a08b0fcc90db 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -261,6 +261,9 @@ static void demoteDefined(Defined &sym, DenseMap &map) { Undefined(sym.file, sym.getName(), binding, sym.stOther, sym.type, /*discardedSecIdx=*/map.lookup(sym.section)) .overwrite(sym); + // Eliminate from the symbol table, otherwise we would leave an undefined + // symbol if the symbol is unreferenced in the absence of GC. + sym.isUsedInRegularObj = false; } // If all references to a DSO happen to be weak, the DSO is not added to diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 56ba3463aeadc..6ada711a20a6d 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -163,5 +163,10 @@ WebAssembly Improvements is read from object files within the archive. This matches the behaviour of the ELF linker. +SystemZ +------- + +* Add target support for SystemZ (s390x). + Fixes ##### diff --git a/lld/test/COFF/delayimports-armnt.yaml b/lld/test/COFF/delayimports-armnt.yaml index 7d9bc38c5c360..ea96d864ef53d 100644 --- a/lld/test/COFF/delayimports-armnt.yaml +++ b/lld/test/COFF/delayimports-armnt.yaml @@ -6,6 +6,7 @@ # RUN: llvm-readobj --coff-imports %t.exe | FileCheck -check-prefix=IMPORT %s # RUN: llvm-readobj --coff-basereloc %t.exe | FileCheck -check-prefix=BASEREL %s # RUN: llvm-objdump --no-print-imm-hex -d %t.exe | FileCheck --check-prefix=DISASM %s +# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=DIR %s # IMPORT: Format: COFF-ARM # IMPORT-NEXT: Arch: thumb @@ -13,9 +14,9 @@ # IMPORT-NEXT: DelayImport { # IMPORT-NEXT: Name: library.dll # IMPORT-NEXT: Attributes: 0x1 -# IMPORT-NEXT: ModuleHandle: 0x3000 -# IMPORT-NEXT: ImportAddressTable: 0x3008 -# IMPORT-NEXT: ImportNameTable: 0x2040 +# IMPORT-NEXT: ModuleHandle: 0x3008 +# IMPORT-NEXT: ImportAddressTable: 0x3010 +# IMPORT-NEXT: ImportNameTable: 0x2044 # IMPORT-NEXT: BoundDelayImportTable: 0x0 # IMPORT-NEXT: UnloadDelayImportTable: 0x0 # IMPORT-NEXT: Import { @@ -43,7 +44,7 @@ # BASEREL-NEXT: } # BASEREL-NEXT: Entry { # BASEREL-NEXT: Type: HIGHLOW -# BASEREL-NEXT: Address: 0x3008 +# BASEREL-NEXT: Address: 0x3010 # BASEREL-NEXT: } # BASEREL-NEXT: Entry { # BASEREL-NEXT: Type: ABSOLUTE @@ -52,20 +53,24 @@ # BASEREL-NEXT: ] # # DISASM: 00401000 <.text>: -# DISASM: 40100c: f243 0c08 movw r12, #12296 +# DISASM: 40100c: f243 0c10 movw r12, #12304 # DISASM-NEXT: f2c0 0c40 movt r12, #64 # DISASM-NEXT: f000 b800 b.w {{.+}} @ imm = #0 # DISASM-NEXT: e92d 480f push.w {r0, r1, r2, r3, r11, lr} # DISASM-NEXT: f20d 0b10 addw r11, sp, #16 # DISASM-NEXT: ed2d 0b10 vpush {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: 4661 mov r1, r12 -# DISASM-NEXT: f242 0000 movw r0, #8192 +# DISASM-NEXT: f242 0004 movw r0, #8196 # DISASM-NEXT: f2c0 0040 movt r0, #64 # DISASM-NEXT: f7ff ffe7 bl 0x401000 <.text> # DISASM-NEXT: 4684 mov r12, r0 # DISASM-NEXT: ecbd 0b10 vpop {d0, d1, d2, d3, d4, d5, d6, d7} # DISASM-NEXT: e8bd 480f pop.w {r0, r1, r2, r3, r11, lr} # DISASM-NEXT: 4760 bx r12 +# +# DIR: DelayImportDescriptorRVA: 0x2004 +# DIR-NEXT: DelayImportDescriptorSize: 0x40 + --- !COFF header: @@ -80,6 +85,14 @@ sections: - VirtualAddress: 0 SymbolName: __imp_function Type: IMAGE_REL_ARM_MOV32T + - Name: .rdata + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 1 + SectionData: 01 + - Name: .data + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ] + Alignment: 1 + SectionData: 02 symbols: - Name: .text Value: 0 diff --git a/lld/test/ELF/linkerscript/discard-section.s b/lld/test/ELF/linkerscript/discard-section.s index 24f3b2b73e991..0bbebac59bb34 100644 --- a/lld/test/ELF/linkerscript/discard-section.s +++ b/lld/test/ELF/linkerscript/discard-section.s @@ -9,6 +9,9 @@ # RUN: ld.lld -r -T a.lds a.o b.o -o a.ro 2>&1 | FileCheck %s --check-prefix=WARNING --implicit-check-not=warning: # RUN: llvm-readelf -r -s a.ro | FileCheck %s --check-prefix=RELOC +# RUN: ld.lld -r --gc-sections -T a.lds a.o b.o -o a.gc.ro --no-fatal-warnings +# RUN: llvm-readelf -r -s a.gc.ro | FileCheck %s --check-prefix=RELOC-GC + # LOCAL: error: relocation refers to a discarded section: .aaa # LOCAL-NEXT: >>> defined in a.o # LOCAL-NEXT: >>> referenced by a.o:(.bbb+0x0) @@ -32,16 +35,18 @@ # WARNING: warning: relocation refers to a discarded section: .aaa # WARNING-NEXT: >>> referenced by a.o:(.rela.bbb+0x0) +## GNU ld reports "defined in discarded secion" errors even in -r mode. +## We set the symbol index to 0. # RELOC: Relocation section '.rela.bbb' at offset {{.*}} contains 1 entries: # RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend # RELOC-NEXT: 0000000000000000 0000000000000000 R_X86_64_NONE 0 # RELOC-EMPTY: # RELOC-NEXT: Relocation section '.rela.data' at offset {{.*}} contains 4 entries: # RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -# RELOC-NEXT: 0000000000000000 0000000500000001 R_X86_64_64 0000000000000000 global + 0 -# RELOC-NEXT: 0000000000000008 0000000700000001 R_X86_64_64 0000000000000000 weak + 0 -# RELOC-NEXT: 0000000000000010 0000000600000001 R_X86_64_64 0000000000000000 weakref1 + 0 -# RELOC-NEXT: 0000000000000018 0000000800000001 R_X86_64_64 0000000000000000 weakref2 + 0 +# RELOC-NEXT: 0000000000000000 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000008 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000010 0000000000000001 R_X86_64_64 0 +# RELOC-NEXT: 0000000000000018 0000000000000001 R_X86_64_64 0 # RELOC: Num: Value Size Type Bind Vis Ndx Name # RELOC-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND @@ -49,23 +54,25 @@ # RELOC-NEXT: 2: 0000000000000000 0 SECTION LOCAL DEFAULT 2 .bbb # RELOC-NEXT: 3: 0000000000000000 0 SECTION LOCAL DEFAULT 4 .data # RELOC-NEXT: 4: 0000000000000000 0 NOTYPE GLOBAL DEFAULT 1 _start -# RELOC-NEXT: 5: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND global -# RELOC-NEXT: 6: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND weakref1 -# RELOC-NEXT: 7: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND weak -# RELOC-NEXT: 8: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND weakref2 # RELOC-EMPTY: +# RELOC-GC: There are no relocations in this file. + #--- a.s .globl _start _start: .section .aaa,"a" -.globl global, weakref1 +.globl global, weakref1, unused .weak weak, weakref2 global: weak: weakref1: weakref2: +## Eliminate `unused` just like GC discarded definitions. +## Linux kernel's CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y configuration expects +## that the unreferenced `unused` is not emitted to .symtab. +unused: .quad 0 .section .bbb,"aw" diff --git a/lld/test/ELF/loongarch-reloc-leb128.s b/lld/test/ELF/loongarch-reloc-leb128.s new file mode 100644 index 0000000000000..9e6f221e62b63 --- /dev/null +++ b/lld/test/ELF/loongarch-reloc-leb128.s @@ -0,0 +1,102 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax a.s -o a.o +# RUN: llvm-readobj -r -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a.o | FileCheck %s --check-prefix=REL +# RUN: ld.lld -shared --gc-sections a.o -o a.so +# RUN: llvm-readelf -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a.so | FileCheck %s + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax a.s -o a32.o +# RUN: llvm-readobj -r -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a32.o | FileCheck %s --check-prefix=REL +# RUN: ld.lld -shared --gc-sections a32.o -o a32.so +# RUN: llvm-readelf -x .gcc_except_table -x .debug_rnglists -x .debug_loclists a32.so | FileCheck %s + +# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax extraspace.s -o extraspace32.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax extraspace.s -o extraspace64.o +# RUN: not ld.lld -shared extraspace32.o 2>&1 | FileCheck %s --check-prefix=ERROR +# RUN: not ld.lld -shared extraspace64.o 2>&1 | FileCheck %s --check-prefix=ERROR +# ERROR: error: extraspace{{.*}}.o:(.rodata+0x0): extra space for uleb128 + +#--- a.s +.cfi_startproc +.cfi_lsda 0x1b,.LLSDA0 +.cfi_endproc + +.section .text.w,"axR" +break 0; break 0; break 0; w1: + .p2align 4 # 4 bytes after relaxation +w2: break 0 + +.section .text.x,"ax" +break 0; break 0; break 0; x1: + .p2align 4 # 4 bytes after relaxation +x2: break 0 + +.section .gcc_except_table,"a" +.LLSDA0: +.uleb128 w2-w1+116 # initial value: 0x0080 +.uleb128 w1-w2+141 # initial value: 0x0080 +.uleb128 w2-w1+16372 # initial value: 0x008080 +.uleb128 w1-w2+16397 # initial value: 0x008080 +.uleb128 w2-w1+2097140 # initial value: 0x00808080 +.uleb128 w1-w2+2097165 # initial value: 0x00808080 + +.section .debug_rnglists +.uleb128 w2-w1+116 # initial value: 0x0080 +.uleb128 w1-w2+141 # initial value: 0x0080 +.uleb128 w2-w1+16372 # initial value: 0x008080 +.uleb128 w1-w2+16397 # initial value: 0x008080 +.uleb128 w2-w1+2097140 # initial value: 0x00808080 +.uleb128 w1-w2+2097165 # initial value: 0x00808080 + +.section .debug_loclists +.uleb128 x2-x1 # references discarded symbols + +# REL: Section ({{.*}}) .rela.debug_rnglists { +# REL-NEXT: 0x0 R_LARCH_ADD_ULEB128 w2 0x74 +# REL-NEXT: 0x0 R_LARCH_SUB_ULEB128 w1 0x0 +# REL-NEXT: 0x2 R_LARCH_ADD_ULEB128 w1 0x8D +# REL-NEXT: 0x2 R_LARCH_SUB_ULEB128 w2 0x0 +# REL-NEXT: 0x4 R_LARCH_ADD_ULEB128 w2 0x3FF4 +# REL-NEXT: 0x4 R_LARCH_SUB_ULEB128 w1 0x0 +# REL-NEXT: 0x7 R_LARCH_ADD_ULEB128 w1 0x400D +# REL-NEXT: 0x7 R_LARCH_SUB_ULEB128 w2 0x0 +# REL-NEXT: 0xA R_LARCH_ADD_ULEB128 w2 0x1FFFF4 +# REL-NEXT: 0xA R_LARCH_SUB_ULEB128 w1 0x0 +# REL-NEXT: 0xE R_LARCH_ADD_ULEB128 w1 0x20000D +# REL-NEXT: 0xE R_LARCH_SUB_ULEB128 w2 0x0 +# REL-NEXT: } +# REL: Section ({{.*}}) .rela.debug_loclists { +# REL-NEXT: 0x0 R_LARCH_ADD_ULEB128 x2 0x0 +# REL-NEXT: 0x0 R_LARCH_SUB_ULEB128 x1 0x0 +# REL-NEXT: } + +# REL: Hex dump of section '.gcc_except_table': +# REL-NEXT: 0x00000000 80008000 80800080 80008080 80008080 . +# REL-NEXT: 0x00000010 8000 . +# REL: Hex dump of section '.debug_rnglists': +# REL-NEXT: 0x00000000 80008000 80800080 80008080 80008080 . +# REL-NEXT: 0x00000010 8000 . +# REL: Hex dump of section '.debug_loclists': +# REL-NEXT: 0x00000000 00 . + +# CHECK: Hex dump of section '.gcc_except_table': +# CHECK-NEXT: 0x[[#%x,]] f8008901 f8ff0089 8001f8ff ff008980 . +# CHECK-NEXT: 0x[[#%x,]] 8001 . +# CHECK: Hex dump of section '.debug_rnglists': +# CHECK-NEXT: 0x00000000 f8008901 f8ff0089 8001f8ff ff008980 . +# CHECK-NEXT: 0x00000010 8001 . +# CHECK: Hex dump of section '.debug_loclists': +# CHECK-NEXT: 0x00000000 00 . + +#--- extraspace.s +.text +w1: + la.pcrel $t0, w1 +w2: + +.rodata +.reloc ., R_LARCH_ADD_ULEB128, w2 +.reloc ., R_LARCH_SUB_ULEB128, w1 +.fill 10, 1, 0x80 +.byte 0 diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 44f2850b92d52..c5fa66390bba8 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -22,10 +22,10 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 2) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX rc) + set(LLVM_VERSION_SUFFIX) endif() if (NOT PACKAGE_VERSION) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 5b3210138f2f8..ecfcd2c983ce5 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -215,6 +215,17 @@ Changes to the RISC-V Backend * ``-mcpu=sifive-p670`` was added. * Support for the Zicond extension is no longer experimental. +Changes to the SystemZ Backend +------------------------------ + +* Properly support 16 byte atomic int/fp types and ops. +* Support i128 as legal type in VRs. +* Add an i128 cost model. +* Support building individual functions with backchain using the + __attribute__((target("backchain"))) syntax. +* Add exception handling for XPLINK. +* Add support for llvm-objcopy. + Changes to the WebAssembly Backend ---------------------------------- @@ -339,36 +350,41 @@ Changes to the Debug Info Changes to the LLVM tools --------------------------------- -* llvm-symbolizer now treats invalid input as an address for which source +* ``llvm-symbolizer`` now treats invalid input as an address for which source information is not found. -* Fixed big-endian support in llvm-symbolizer's DWARF location parser. -* llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra +* Fixed big-endian support in ``llvm-symbolizer``'s DWARF location parser. +* ``llvm-readelf`` now supports ``--extra-sym-info`` (``-X``) to display extra information (section name) when showing symbols. +* ``llvm-readobj``/``llvm-readelf`` now supports ``--decompress``/``-z`` with + string and hex dump for ELF object files. -* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use - debugging information to print symbols' filenames and line numbers. +* ``llvm-symbolizer`` and ``llvm-addr2line`` now support addresses specified as symbol names. -* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names. - -* llvm-objcopy now supports ``--gap-fill`` and ``--pad-to`` options, for +* ``llvm-objcopy`` now supports ``--gap-fill`` and ``--pad-to`` options, for ELF input and binary output files only. +* ``llvm-objcopy`` now supports ``-O elf64-s390`` for SystemZ. -* Supported parsing XCOFF auxiliary symbols in obj2yaml. +* Supported parsing XCOFF auxiliary symbols in ``obj2yaml``. * ``llvm-ranlib`` now supports ``-X`` on AIX to specify the type of object file ranlib should examine. +* ``llvm-cxxfilt`` now supports ``--no-params``/``-p`` to skip function + parameters. + * ``llvm-nm`` now supports ``--export-symbol`` to ignore the import symbol file. +* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use + debugging information to print symbols' filenames and line numbers. -* llvm-rc and llvm-windres now accept file path references in ``.rc`` files +* ``llvm-rc`` and ``llvm-windres`` now accept file path references in ``.rc`` files concatenated from multiple string literals. -* The llvm-windres option ``--preprocessor`` now resolves its argument - in the PATH environment variable as expected, and options passed with +* The ``llvm-windres`` option ``--preprocessor`` now resolves its argument + in the ``PATH`` environment variable as expected, and options passed with ``--preprocessor-arg`` are placed before the input file as they should be. -* The llvm-windres option ``--preprocessor`` has been updated with the +* The ``llvm-windres`` option ``--preprocessor`` has been updated with the breaking behaviour change from GNU windres from binutils 2.36, where the whole argument is considered as one path, not considered as a sequence of tool name and parameters. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 7a92e62b53c53..c6eb66cc9660c 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -406,6 +406,11 @@ bool maskIsAllZeroOrUndef(Value *Mask); /// lanes can be assumed active. bool maskIsAllOneOrUndef(Value *Mask); +/// Given a mask vector of i1, Return true if any of the elements of this +/// predicate mask are known to be true or undef. That is, return true if at +/// least one lane can be assumed active. +bool maskContainsAllOneOrUndef(Value *Mask); + /// Given a mask vector of the form , return an APInt (of bitwidth Y) /// for each lane which may be active. APInt possiblyDemandedEltsInMask(Value *Mask); diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index 032de72851782..40c2cce8c6eff 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -620,7 +620,7 @@ class SecondFusionPredicateWithMCInstPredicate : FusionPredicateWithMCInstPredicate; // The pred will be applied on both firstMI and secondMI. class BothFusionPredicateWithMCInstPredicate - : FusionPredicateWithMCInstPredicate; + : FusionPredicateWithMCInstPredicate; // Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position // `firstOpIdx` should be the same as the operand of `SecondMI` at position diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 412115eb649c2..9f9451e4e814a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5986,6 +5986,8 @@ void llvm::getUnderlyingObjects(const Value *V, if (!LI || !LI->isLoopHeader(PN->getParent()) || isSameUnderlyingObjectInLoop(PN, LI)) append_range(Worklist, PN->incoming_values()); + else + Objects.push_back(P); continue; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 73facc76a92b2..bf7bc0ba84a03 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1012,6 +1012,31 @@ bool llvm::maskIsAllOneOrUndef(Value *Mask) { return true; } +bool llvm::maskContainsAllOneOrUndef(Value *Mask) { + assert(isa(Mask->getType()) && + isa(Mask->getType()->getScalarType()) && + cast(Mask->getType()->getScalarType())->getBitWidth() == + 1 && + "Mask must be a vector of i1"); + + auto *ConstMask = dyn_cast(Mask); + if (!ConstMask) + return false; + if (ConstMask->isAllOnesValue() || isa(ConstMask)) + return true; + if (isa(ConstMask->getType())) + return false; + for (unsigned + I = 0, + E = cast(ConstMask->getType())->getNumElements(); + I != E; ++I) { + if (auto *MaskElt = ConstMask->getAggregateElement(I)) + if (MaskElt->isAllOnesValue() || isa(MaskElt)) + return true; + } + return false; +} + /// TODO: This is a lot like known bits, but for /// vectors. Is there something we can common this with? APInt llvm::possiblyDemandedEltsInMask(Value *Mask) { diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 8e508dbdb1c69..026d252ec5bcd 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -44,6 +44,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolMachO.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" @@ -1950,7 +1951,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); } - if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc && Sym->isExternal()) + if (MAI.hasSubsectionsViaSymbols() && CFIStartProcLoc && + Sym->isExternal() && !cast(Sym)->isAltEntry()) return Error(StartTokLoc, "non-private labels cannot appear between " ".cfi_startproc / .cfi_endproc pairs") && Error(*CFIStartProcLoc, "previous .cfi_startproc was here"); diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 11248bb7aef31..03d641d04413e 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -43,6 +43,8 @@ static cl::opt GenerateThunks("arm64ec-generate-thunks", cl::Hidden, namespace { +enum class ThunkType { GuestExit, Entry, Exit }; + class AArch64Arm64ECCallLowering : public ModulePass { public: static char ID; @@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass { Type *I64Ty; Type *VoidTy; - void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk, + void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty); void getThunkRetType(FunctionType *FT, AttributeList AttrList, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool &HasSretPtr); - void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, + void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool HasSretPtr); @@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass { void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, AttributeList AttrList, - bool EntryThunk, raw_ostream &Out, + ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) { - Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$"); + Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$" + : "$iexit_thunk$cdecl$"); Type *Arm64RetTy; Type *X64RetTy; @@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, // The first argument to a thunk is the called function, stored in x9. // For exit thunks, we pass the called function down to the emulator; - // for entry thunks, we just call the Arm64 function directly. - if (!EntryThunk) + // for entry/guest exit thunks, we just call the Arm64 function directly. + if (TT == ThunkType::Exit) Arm64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy); @@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, X64ArgTypes, HasSretPtr); - getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr); + getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, + HasSretPtr); Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); + X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false); } void AArch64Arm64ECCallLowering::getThunkArgTypes( - FunctionType *FT, AttributeList AttrList, raw_ostream &Out, + FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool HasSretPtr) { @@ -156,9 +161,11 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( X64ArgTypes.push_back(PtrTy); // x5 Arm64ArgTypes.push_back(I64Ty); - // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we - // have proper isel for varargs - X64ArgTypes.push_back(I64Ty); + if (TT != ThunkType::Entry) { + // FIXME: x5 isn't actually used by the x64 side; revisit once we + // have proper isel for varargs + X64ArgTypes.push_back(I64Ty); + } return; } @@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, SmallString<256> ExitThunkName; llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty, - X64Ty); + getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(ExitThunkName)) return F; @@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { SmallString<256> EntryThunkName; llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(EntryThunkName)) return F; @@ -465,10 +471,11 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; + unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size(); // Translate arguments to call. SmallVector Args; - for (unsigned i = ThunkArgOffset, e = Thunk->arg_size(); i != e; ++i) { + for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) { Value *Arg = Thunk->getArg(i); Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset); if (ArgTy->isArrayTy() || ArgTy->isStructTy() || @@ -485,6 +492,22 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Args.push_back(Arg); } + if (F->isVarArg()) { + // The 5th argument to variadic entry thunks is used to model the x64 sp + // which is passed to the thunk in x4, this can be passed to the callee as + // the variadic argument start address after skipping over the 32 byte + // shadow store. + + // The EC thunk CC will assign any argument marked as InReg to x4. + Thunk->addParamAttr(5, Attribute::InReg); + Value *Arg = Thunk->getArg(5); + Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20)); + Args.push_back(Arg); + + // Pass in a zero variadic argument size (in x5). + Args.push_back(IRB.getInt64(0)); + } + // Call the function passed to the thunk. Value *Callee = Thunk->getArg(0); Callee = IRB.CreateBitCast(Callee, PtrTy); @@ -518,7 +541,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { llvm::raw_null_ostream NullThunkName; FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty); auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); assert(MangledName && "Can't guest exit to function that's already native"); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 78ea4a5180f70..8e67f0f5c8815 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -213,6 +213,9 @@ def CC_AArch64_Arm64EC_VarArg : CallingConv<[ // address is passed in X9. let Entry = 1 in def CC_AArch64_Arm64EC_Thunk : CallingConv<[ + // ARM64EC-specific: the InReg attribute can be used to access the x64 sp passed into entry thunks in x4 from the IR. + CCIfInReg>>, + // Byval aggregates are passed by pointer CCIfByVal>, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0287856560e91..196aa50cf4060 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (IsVarArg && Subtarget->isWindowsArm64EC()) { + SDValue ParamPtr = StackPtr; + if (IsTailCall) { + // Create a dummy object at the top of the stack that can be used to get + // the SP after the epilogue + int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true); + ParamPtr = DAG.getFrameIndex(FI, PtrVT); + } + // For vararg calls, the Arm64EC ABI requires values in x4 and x5 // describing the argument list. x4 contains the address of the // first stack parameter. x5 contains the size in bytes of all parameters // passed on the stack. - RegsToPass.emplace_back(AArch64::X4, StackPtr); + RegsToPass.emplace_back(AArch64::X4, ParamPtr); RegsToPass.emplace_back(AArch64::X5, DAG.getConstant(NumBytes, DL, MVT::i64)); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 76c1a14fe0156..907aae13d6de0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2343,7 +2343,9 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode( LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), DAG.getConstant(MaskIdx0, DL, GRLenVT)); } @@ -4940,3 +4942,8 @@ bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const { return !isa(Y); } + +ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { + // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. + return ISD::SIGN_EXTEND; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 72182623b2c3d..9e9ac0b826929 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -206,6 +206,8 @@ class LoongArchTargetLowering : public TargetLowering { return ISD::SIGN_EXTEND; } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; + Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 27d7f0f261d10..adfcea7361583 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -1255,7 +1255,9 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI); } - if (getABI().IsN32()) { +#if 0 + // We haven't support -mabicalls -mno-shared yet. + if (-mno-shared) { MCSymbol *GPSym = MCA.getContext().getOrCreateSymbol("__gnu_local_gp"); const MipsMCExpr *HiExpr = MipsMCExpr::create( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(GPSym, MCA.getContext()), @@ -1273,6 +1275,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, return; } +#endif const MipsMCExpr *HiExpr = MipsMCExpr::createGpOff( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(&Sym, MCA.getContext()), @@ -1288,8 +1291,11 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(), &STI); - // daddu $gp, $gp, $funcreg - emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); + // (d)addu $gp, $gp, $funcreg + if (getABI().IsN32()) + emitRRR(Mips::ADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); + else + emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); } void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 866d5cf340e68..66dab70d455ff 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -37,6 +37,9 @@ static cl::opt SLPMaxVF( InstructionCost RISCVTTIImpl::getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, TTI::TargetCostKind CostKind) { + // Check if the type is valid for all CostKind + if (!VT.isVector()) + return InstructionCost::getInvalid(); size_t NumInstr = OpCodes.size(); if (CostKind == TTI::TCK_CodeSize) return NumInstr; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a071c5a3ca032..9e64726fb6fff 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47878,6 +47878,7 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, SDValue X, Y; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Not = GetNot(N0)) { X = Not; @@ -47891,9 +47892,11 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, X = DAG.getBitcast(VT, X); Y = DAG.getBitcast(VT, Y); SDLoc DL(N); + // We do not split for SSE at all, but we need to split vectors for AVX1 and // AVX2. - if (!Subtarget.useAVX512Regs() && VT.is512BitVector()) { + if (!Subtarget.useAVX512Regs() && VT.is512BitVector() && + TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) { SDValue LoX, HiX; std::tie(LoX, HiX) = splitVector(X, DAG, DL); SDValue LoY, HiY; @@ -47903,7 +47906,11 @@ static SDValue combineAndShuffleNot(SDNode *N, SelectionDAG &DAG, SDValue HiV = DAG.getNode(X86ISD::ANDNP, DL, SplitVT, {HiX, HiY}); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV}); } - return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y}); + + if (TLI.isTypeLegal(VT)) + return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y}); + + return SDValue(); } // Try to widen AND, OR and XOR nodes to VT in order to remove casts around diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index cd40b1d3b0933..be774a89eccbb 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6080,6 +6080,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, for (const Instruction &I : instructions(Callee)) { if (const auto *CB = dyn_cast(&I)) { + // Having more target features is fine for inline ASM. + if (CB->isInlineAsm()) + continue; + SmallVector Types; for (Value *Arg : CB->args()) Types.push_back(Arg->getType()); diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 8058282c42250..062a3d341007c 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -652,10 +652,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. - // Because there could be several/many load instructions, remember which - // blocks we know to be transparent to the load. - df_iterator_default_set TranspBlocks; - for (LoadInst *Load : Loads) { // Check to see if the load is invalidated from the start of the block to // the load itself. @@ -669,7 +665,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, // To do this, we perform a depth first search on the inverse CFG from the // loading block. for (BasicBlock *P : predecessors(BB)) { - for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) + for (BasicBlock *TranspBB : inverse_depth_first(P)) if (AAR.canBasicBlockModify(*TranspBB, Loc)) return false; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index a647be2d26c76..bc43edb5e6206 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -412,11 +412,14 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) { if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) { // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) { - Align Alignment = cast(II.getArgOperand(2))->getAlignValue(); - StoreInst *S = - new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment); - S->copyMetadata(II); - return S; + if (maskContainsAllOneOrUndef(ConstMask)) { + Align Alignment = + cast(II.getArgOperand(2))->getAlignValue(); + StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, + Alignment); + S->copyMetadata(II); + return S; + } } // scatter(vector, splat(ptr), splat(true)) -> store extract(vector, // lastlane), ptr diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 58f0763bb0c0c..c5d3f60176a82 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2156,14 +2156,14 @@ static bool collectInsertionElements(Value *V, unsigned Shift, Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); for (unsigned i = 0; i != NumElts; ++i) { - unsigned ShiftI = Shift + i * ElementSize; + unsigned ShiftI = i * ElementSize; Constant *Piece = ConstantFoldBinaryInstruction( Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI)); if (!Piece) return false; Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy, isBigEndian)) return false; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8c0fd66225513..9973a80a7db94 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6491,6 +6491,13 @@ InstCombiner::getFlippedStrictnessPredicateAndConstant(CmpInst::Predicate Pred, if (!SafeReplacementConstant) SafeReplacementConstant = CI; } + } else if (isa(C->getType())) { + // Handle scalable splat + Value *SplatC = C->getSplatValue(); + auto *CI = dyn_cast_or_null(SplatC); + // Bail out if the constant can't be safely incremented/decremented. + if (!CI || !ConstantIsOk(CI)) + return std::nullopt; } else { // ConstantExpr? return std::nullopt; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 21bfc91148bfe..9f220ec003ec3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1284,7 +1284,11 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, isGuaranteedNotToBeUndefOrPoison(CmpRHS, SQ.AC, &Sel, &DT)) { if (Value *V = simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, SQ, /* AllowRefinement */ true)) - return replaceOperand(Sel, Swapped ? 2 : 1, V); + // Require either the replacement or the simplification result to be a + // constant to avoid infinite loops. + // FIXME: Make this check more precise. + if (isa(CmpRHS) || isa(V)) + return replaceOperand(Sel, Swapped ? 2 : 1, V); // Even if TrueVal does not simplify, we can directly replace a use of // CmpLHS with CmpRHS, as long as the instruction is not used anywhere @@ -1302,7 +1306,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, isGuaranteedNotToBeUndefOrPoison(CmpLHS, SQ.AC, &Sel, &DT)) if (Value *V = simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, SQ, /* AllowRefinement */ true)) - return replaceOperand(Sel, Swapped ? 2 : 1, V); + if (isa(CmpLHS) || isa(V)) + return replaceOperand(Sel, Swapped ? 2 : 1, V); auto *FalseInst = dyn_cast(FalseVal); if (!FalseInst) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 11a91bfbe5baf..380d658365536 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -857,6 +857,9 @@ struct DSEState { // no longer be captured. bool ShouldIterateEndOfFunctionDSE; + /// Dead instructions to be removed at the end of DSE. + SmallVector ToRemove; + // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; @@ -1692,7 +1695,8 @@ struct DSEState { return {MaybeDeadAccess}; } - // Delete dead memory defs + /// Delete dead memory defs and recursively add their operands to ToRemove if + /// they became dead. void deleteDeadInstruction(Instruction *SI) { MemorySSAUpdater Updater(&MSSA); SmallVector NowDeadInsts; @@ -1708,8 +1712,11 @@ struct DSEState { salvageKnowledge(DeadInst); // Remove the Instruction from MSSA. - if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { - if (MemoryDef *MD = dyn_cast(MA)) { + MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst); + bool IsMemDef = MA && isa(MA); + if (MA) { + if (IsMemDef) { + auto *MD = cast(MA); SkipStores.insert(MD); if (auto *SI = dyn_cast(MD->getMemoryInst())) { if (SI->getValueOperand()->getType()->isPointerTy()) { @@ -1730,13 +1737,21 @@ struct DSEState { // Remove its operands for (Use &O : DeadInst->operands()) if (Instruction *OpI = dyn_cast(O)) { - O = nullptr; + O.set(PoisonValue::get(O->getType())); if (isInstructionTriviallyDead(OpI, &TLI)) NowDeadInsts.push_back(OpI); } EI.removeInstruction(DeadInst); - DeadInst->eraseFromParent(); + // Remove memory defs directly if they don't produce results, but only + // queue other dead instructions for later removal. They may have been + // used as memory locations that have been cached by BatchAA. Removing + // them here may lead to newly created instructions to be allocated at the + // same address, yielding stale cache entries. + if (IsMemDef && DeadInst->getType()->isVoidTy()) + DeadInst->eraseFromParent(); + else + ToRemove.push_back(DeadInst); } } @@ -1892,15 +1907,15 @@ struct DSEState { Malloc->getArgOperand(0), IRB, TLI); if (!Calloc) return false; + MemorySSAUpdater Updater(&MSSA); auto *NewAccess = Updater.createMemoryAccessAfter(cast(Calloc), nullptr, MallocDef); auto *NewAccessMD = cast(NewAccess); Updater.insertDef(NewAccessMD, /*RenameUses=*/true); - Updater.removeMemoryAccess(Malloc); Malloc->replaceAllUsesWith(Calloc); - Malloc->eraseFromParent(); + deleteDeadInstruction(Malloc); return true; } @@ -2233,6 +2248,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, MadeChange |= State.eliminateRedundantStoresOfExistingValues(); MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); + + while (!State.ToRemove.empty()) { + Instruction *DeadInst = State.ToRemove.pop_back_val(); + DeadInst->eraseFromParent(); + } + return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Analysis/CostModel/RISCV/vector-cost-without-v.ll b/llvm/test/Analysis/CostModel/RISCV/vector-cost-without-v.ll new file mode 100644 index 0000000000000..cd99065f0285c --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/vector-cost-without-v.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -mtriple=riscv64 -mattr=+f,+d --passes=loop-unroll-full -S | FileCheck %s + +; Check it doesn't crash when the vector extension is not enabled. +define void @foo() { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4 +; CHECK-NEXT: [[SPLAT_SPLAT_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: [[CMP1_I_I_I:%.*]] = fcmp ogt <2 x float> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[SPLAT_SPLAT3_I_I_I:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: [[XOR3_I_I_I_I_I:%.*]] = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr null, align 4 +; CHECK-NEXT: [[SPLAT_SPLAT8_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SUB_I_I_I:%.*]] = fsub <2 x float> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[MUL_I_I_I:%.*]] = shl i64 0, 0 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr null, align 4 +; CHECK-NEXT: [[SPLAT_SPLAT_I_I_I_I:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: [[XOR3_I_I_I_V_I_I:%.*]] = select <2 x i1> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV1]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV1]], 8 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %0 = load float, ptr null, align 4 + %splat.splat.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer + %cmp1.i.i.i = fcmp ogt <2 x float> zeroinitializer, zeroinitializer + %splat.splat3.i.i.i = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer + %xor3.i.i.i.i.i = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer + %1 = load float, ptr null, align 4 + %splat.splat8.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer + %sub.i.i.i = fsub <2 x float> zeroinitializer, zeroinitializer + %mul.i.i.i = shl i64 0, 0 + %2 = load float, ptr null, align 4 + %splat.splat.i.i.i.i = shufflevector <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x i32> zeroinitializer + %xor3.i.i.i.v.i.i = select <2 x i1> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer + %indvars.iv.next = add i64 %indvars.iv1, 1 + %exitcond = icmp ne i64 %indvars.iv1, 8 + br i1 %exitcond, label %for.body, label %exit + +exit: ; preds = %for.body + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll new file mode 100644 index 0000000000000..106dc8c13a49f --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-object-loop-varying-phi.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Test case for https://github.com/llvm/llvm-project/issues/82665. +define void @indirect_ptr_recurrences_read_write(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndidrectUnsafe: +; CHECK-NEXT: %l = load i32, ptr %ptr.recur, align 4, !tbaa !4 -> +; CHECK-NEXT: store i32 %xor, ptr %ptr.recur, align 4, !tbaa !4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4, !tbaa !10 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @indirect_ptr_recurrences_read_only_loop(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_only_loop' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %xor, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4, !tbaa !10 + %xor = xor i32 %l, 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %xor +} + +define void @indirect_ptr_recurrences_read_write_may_alias_no_tbaa(ptr %A, ptr %B) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_no_tbaa' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + store i32 %xor, ptr %ptr.recur, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_alias_different_obj(ptr %A, ptr %B, ptr %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_alias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @indirect_ptr_recurrences_read_write_may_noalias_different_obj(ptr %A, ptr %B, ptr noalias %C) { +; CHECK-LABEL: 'indirect_ptr_recurrences_read_write_may_noalias_different_obj' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %ptr.recur = phi ptr [ %A, %entry ], [ %ptr.next, %loop ] + %gep.B = getelementptr inbounds ptr, ptr %B, i64 %iv + %ptr.next = load ptr, ptr %gep.B, align 8, !tbaa !6 + %l = load i32, ptr %ptr.recur, align 4 + %xor = xor i32 %l, 1 + %gep.C = getelementptr inbounds ptr, ptr %C, i64 %iv + store i32 %xor, ptr %gep.C, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 5 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + + +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"int", !8, i64 0} diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 5c56f51e1ca55..bb9ba05f7a272 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind { ; CHECK-NEXT: add x29, sp, #160 ; CHECK-NEXT: .seh_add_fp 160 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: ldp x8, x5, [x4, #32] -; CHECK-NEXT: mov x4, x8 +; CHECK-NEXT: add x4, x4, #32 +; CHECK-NEXT: mov x5, xzr ; CHECK-NEXT: blr x9 ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret ; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret] diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll index dc16b3a1a0f27..844fc52ddade6 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll @@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind { ret void } +define void @varargs_caller_tail() nounwind { +; CHECK-LABEL: varargs_caller_tail: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w5, #16 // =0x10 +; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: bl "#varargs_callee" +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add x4, sp, #48 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #4 // =0x4 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #2 // =0x2 +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: b "#varargs_callee" + call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> ) + tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2) + ret void +} declare void @llvm.va_start(ptr) diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll index 2d6db1642247d..812837639196e 100644 --- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll +++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s @@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" } ; CHECK: ldr x9, [x9] ; CHECK: mov v0.16b, v16.16b ; CHECK: br x9 +; CHECK-EC: mov v7.16b, v0.16b +; CHECK-EC: ldr x9, [x0] +; CHECK-EC: ldr x11, [x9] +; CHECK-EC: mov v0.16b, v7.16b +; CHECK-EC: add x4, sp, #64 +; CHECK-EC: add sp, sp, #64 +; CHECK-EC: br x11 diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll index b0f29ee790885..b84c1093eb75f 100644 --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -25,15 +25,16 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: andi $a5, $a5, 255 ; LA64-NEXT: sll.w $a5, $a5, $a3 ; LA64-NEXT: and $a6, $a2, $a4 -; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: addi.w $a6, $a2, 0 ; LA64-NEXT: .LBB0_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a0, 0 -; LA64-NEXT: bne $a5, $a2, .LBB0_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a6, .LBB0_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: move $a7, $a5 ; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB0_3 ; LA64-NEXT: b .LBB0_6 @@ -42,11 +43,9 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB0_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a2, 0 -; LA64-NEXT: move $a2, $a5 -; LA64-NEXT: bne $a5, $a6, .LBB0_1 +; LA64-NEXT: bne $a2, $a6, .LBB0_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a3 +; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -77,15 +76,16 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0 ; LA64-NEXT: sll.w $a5, $a5, $a3 ; LA64-NEXT: and $a6, $a2, $a4 -; LA64-NEXT: or $a6, $a6, $a5 +; LA64-NEXT: or $a5, $a6, $a5 +; LA64-NEXT: addi.w $a6, $a2, 0 ; LA64-NEXT: .LBB1_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a5, $a0, 0 -; LA64-NEXT: bne $a5, $a2, .LBB1_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a6, .LBB1_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64-NEXT: move $a7, $a6 +; LA64-NEXT: move $a7, $a5 ; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB1_3 ; LA64-NEXT: b .LBB1_6 @@ -94,11 +94,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB1_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: addi.w $a6, $a2, 0 -; LA64-NEXT: move $a2, $a5 -; LA64-NEXT: bne $a5, $a6, .LBB1_1 +; LA64-NEXT: bne $a2, $a6, .LBB1_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a5, $a3 +; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -107,37 +105,36 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i32: ; LA64: # %bb.0: -; LA64-NEXT: ld.w $a3, $a0, 0 -; LA64-NEXT: addi.w $a2, $a1, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB2_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB2_3 Depth 2 -; LA64-NEXT: addi.w $a4, $a3, 0 -; LA64-NEXT: sltu $a1, $a4, $a2 -; LA64-NEXT: xori $a1, $a1, 1 -; LA64-NEXT: addi.d $a5, $a3, 1 -; LA64-NEXT: masknez $a5, $a5, $a1 +; LA64-NEXT: addi.w $a3, $a2, 0 +; LA64-NEXT: sltu $a4, $a3, $a1 +; LA64-NEXT: xori $a4, $a4, 1 +; LA64-NEXT: addi.d $a2, $a2, 1 +; LA64-NEXT: masknez $a4, $a2, $a4 ; LA64-NEXT: .LBB2_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a1, $a0, 0 -; LA64-NEXT: bne $a1, $a3, .LBB2_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a3, .LBB2_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64-NEXT: move $a6, $a5 -; LA64-NEXT: sc.w $a6, $a0, 0 -; LA64-NEXT: beqz $a6, .LBB2_3 +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB2_3 ; LA64-NEXT: b .LBB2_6 ; LA64-NEXT: .LBB2_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB2_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: move $a3, $a1 -; LA64-NEXT: bne $a1, $a4, .LBB2_1 +; LA64-NEXT: bne $a2, $a3, .LBB2_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst ret i32 %result @@ -209,15 +206,16 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: andi $a6, $a6, 255 ; LA64-NEXT: sll.w $a6, $a6, $a3 ; LA64-NEXT: and $a7, $a2, $a4 -; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: addi.w $a7, $a2, 0 ; LA64-NEXT: .LBB4_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB4_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a0, 0 -; LA64-NEXT: bne $a6, $a2, .LBB4_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a7, .LBB4_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 -; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: move $t0, $a6 ; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB4_3 ; LA64-NEXT: b .LBB4_6 @@ -226,11 +224,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB4_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a2, 0 -; LA64-NEXT: move $a2, $a6 -; LA64-NEXT: bne $a6, $a7, .LBB4_1 +; LA64-NEXT: bne $a2, $a7, .LBB4_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a3 +; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -266,15 +262,16 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bstrpick.d $a6, $a6, 15, 0 ; LA64-NEXT: sll.w $a6, $a6, $a3 ; LA64-NEXT: and $a7, $a2, $a4 -; LA64-NEXT: or $a7, $a7, $a6 +; LA64-NEXT: or $a6, $a7, $a6 +; LA64-NEXT: addi.w $a7, $a2, 0 ; LA64-NEXT: .LBB5_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB5_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 -; LA64-NEXT: ll.w $a6, $a0, 0 -; LA64-NEXT: bne $a6, $a2, .LBB5_5 +; LA64-NEXT: ll.w $a2, $a0, 0 +; LA64-NEXT: bne $a2, $a7, .LBB5_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 -; LA64-NEXT: move $t0, $a7 +; LA64-NEXT: move $t0, $a6 ; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB5_3 ; LA64-NEXT: b .LBB5_6 @@ -283,11 +280,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB5_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: addi.w $a7, $a2, 0 -; LA64-NEXT: move $a2, $a6 -; LA64-NEXT: bne $a6, $a7, .LBB5_1 +; LA64-NEXT: bne $a2, $a7, .LBB5_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end -; LA64-NEXT: srl.w $a0, $a6, $a3 +; LA64-NEXT: srl.w $a0, $a2, $a3 ; LA64-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -296,22 +291,22 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; LA64-LABEL: atomicrmw_udec_wrap_i32: ; LA64: # %bb.0: -; LA64-NEXT: ld.w $a4, $a0, 0 +; LA64-NEXT: ld.w $a2, $a0, 0 ; LA64-NEXT: addi.w $a3, $a1, 0 ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB6_1: # %atomicrmw.start ; LA64-NEXT: # =>This Loop Header: Depth=1 ; LA64-NEXT: # Child Loop BB6_3 Depth 2 -; LA64-NEXT: addi.w $a5, $a4, 0 -; LA64-NEXT: sltu $a2, $a3, $a5 -; LA64-NEXT: addi.d $a6, $a4, -1 -; LA64-NEXT: masknez $a6, $a6, $a2 -; LA64-NEXT: maskeqz $a2, $a1, $a2 -; LA64-NEXT: or $a2, $a2, $a6 -; LA64-NEXT: sltui $a6, $a5, 1 -; LA64-NEXT: masknez $a2, $a2, $a6 -; LA64-NEXT: maskeqz $a6, $a1, $a6 -; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: addi.w $a4, $a2, 0 +; LA64-NEXT: sltu $a5, $a3, $a4 +; LA64-NEXT: addi.d $a2, $a2, -1 +; LA64-NEXT: masknez $a2, $a2, $a5 +; LA64-NEXT: maskeqz $a5, $a1, $a5 +; LA64-NEXT: or $a2, $a5, $a2 +; LA64-NEXT: sltui $a5, $a4, 1 +; LA64-NEXT: masknez $a2, $a2, $a5 +; LA64-NEXT: maskeqz $a5, $a1, $a5 +; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: .LBB6_3: # %atomicrmw.start ; LA64-NEXT: # Parent Loop BB6_1 Depth=1 ; LA64-NEXT: # => This Inner Loop Header: Depth=2 @@ -319,17 +314,16 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; LA64-NEXT: bne $a2, $a4, .LBB6_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 -; LA64-NEXT: move $a7, $a6 -; LA64-NEXT: sc.w $a7, $a0, 0 -; LA64-NEXT: beqz $a7, .LBB6_3 +; LA64-NEXT: move $a6, $a5 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB6_3 ; LA64-NEXT: b .LBB6_6 ; LA64-NEXT: .LBB6_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB6_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: move $a4, $a2 -; LA64-NEXT: bne $a2, $a5, .LBB6_1 +; LA64-NEXT: bne $a2, $a4, .LBB6_1 ; LA64-NEXT: # %bb.2: # %atomicrmw.end ; LA64-NEXT: move $a0, $a2 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll index dfbe000841cdc..e008caacad2a1 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { ret i32 %or } +;; The high bits of `const` are zero. +define i32 @pat5_high_zeros(i32 %a) nounwind { +; CHECK-LABEL: pat5_high_zeros: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 1 +; CHECK-NEXT: ori $a1, $a1, 564 +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 +; CHECK-NEXT: ret + %and = and i32 %a, 65535 ; 0x0000ffff + %or = or i32 %and, 305397760 ; 0x12340000 + ret i32 %or +} + ;; Pattern 6: a = b | ((c & mask) << shamt) ;; In this testcase b is 0x10000002, but in fact we do not require b being a ;; constant. As long as all positions in b to be overwritten by the incoming diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll index 417c865f6383f..31ecec6ea8051 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -69,6 +69,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB2_3 @@ -172,6 +173,7 @@ define void @cmpxchg_i16_acquire_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwin define void @cmpxchg_i32_acquire_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB6_3 @@ -279,9 +281,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $a1, 0 ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB10_3 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB10_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 @@ -290,7 +293,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou ; LA64-NEXT: .LBB10_3: ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB10_4: -; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: move $a0, $a1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire %res = extractvalue { i32, i1 } %tmp, 0 @@ -396,6 +399,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB14_3 @@ -407,8 +411,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw ; LA64-NEXT: .LBB14_3: ; LA64-NEXT: dbar 20 ; LA64-NEXT: .LBB14_4: -; LA64-NEXT: addi.w $a0, $a1, 0 -; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire @@ -506,6 +509,7 @@ define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounw define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB18_3 @@ -613,9 +617,10 @@ define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $a1, 0 ; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB22_3 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB22_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 @@ -624,7 +629,7 @@ define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) ; LA64-NEXT: .LBB22_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB22_4: -; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: move $a0, $a1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic %res = extractvalue { i32, i1 } %tmp, 0 @@ -730,6 +735,7 @@ define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) n define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB26_3 @@ -741,8 +747,7 @@ define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) n ; LA64-NEXT: .LBB26_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB26_4: -; LA64-NEXT: addi.w $a0, $a1, 0 -; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 589360823b148..4d8160d708034 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -16,6 +16,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -33,8 +34,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB0_1 +; LA64F-NEXT: bne $a3, $a2, .LBB0_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -51,6 +51,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -68,8 +69,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB0_1 +; LA64D-NEXT: bne $a3, $a2, .LBB0_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 @@ -90,6 +90,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -107,8 +108,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB1_1 +; LA64F-NEXT: bne $a3, $a2, .LBB1_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -125,6 +125,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -142,8 +143,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB1_1 +; LA64D-NEXT: bne $a3, $a2, .LBB1_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 @@ -165,6 +165,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -182,8 +183,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB2_1 +; LA64F-NEXT: bne $a3, $a2, .LBB2_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -201,6 +201,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -218,8 +219,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB2_1 +; LA64D-NEXT: bne $a3, $a2, .LBB2_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 @@ -241,6 +241,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -258,8 +259,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB3_1 +; LA64F-NEXT: bne $a3, $a2, .LBB3_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -277,6 +277,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -294,8 +295,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB3_1 +; LA64D-NEXT: bne $a3, $a2, .LBB3_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 @@ -694,6 +694,7 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB8_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB8_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -711,8 +712,7 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64F-NEXT: .LBB8_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB8_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB8_1 +; LA64F-NEXT: bne $a3, $a2, .LBB8_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -729,6 +729,7 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB8_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB8_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -746,8 +747,7 @@ define float @float_fadd_release(ptr %p) nounwind { ; LA64D-NEXT: .LBB8_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB8_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB8_1 +; LA64D-NEXT: bne $a3, $a2, .LBB8_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 @@ -768,6 +768,7 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB9_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB9_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -785,8 +786,7 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64F-NEXT: .LBB9_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB9_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB9_1 +; LA64F-NEXT: bne $a3, $a2, .LBB9_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -803,6 +803,7 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB9_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB9_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -820,8 +821,7 @@ define float @float_fsub_release(ptr %p) nounwind { ; LA64D-NEXT: .LBB9_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB9_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB9_1 +; LA64D-NEXT: bne $a3, $a2, .LBB9_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 @@ -843,6 +843,7 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB10_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB10_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -860,8 +861,7 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64F-NEXT: .LBB10_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB10_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB10_1 +; LA64F-NEXT: bne $a3, $a2, .LBB10_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -879,6 +879,7 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB10_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB10_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -896,8 +897,7 @@ define float @float_fmin_release(ptr %p) nounwind { ; LA64D-NEXT: .LBB10_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB10_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB10_1 +; LA64D-NEXT: bne $a3, $a2, .LBB10_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 @@ -919,6 +919,7 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB11_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB11_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -936,8 +937,7 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64F-NEXT: .LBB11_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB11_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB11_1 +; LA64F-NEXT: bne $a3, $a2, .LBB11_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -955,6 +955,7 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB11_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB11_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -972,8 +973,7 @@ define float @float_fmax_release(ptr %p) nounwind { ; LA64D-NEXT: .LBB11_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB11_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB11_1 +; LA64D-NEXT: bne $a3, $a2, .LBB11_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 @@ -1372,6 +1372,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB16_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB16_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -1389,8 +1390,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: .LBB16_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB16_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB16_1 +; LA64F-NEXT: bne $a3, $a2, .LBB16_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -1407,6 +1407,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB16_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB16_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -1424,8 +1425,7 @@ define float @float_fadd_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: .LBB16_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB16_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB16_1 +; LA64D-NEXT: bne $a3, $a2, .LBB16_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 @@ -1446,6 +1446,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB17_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB17_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -1463,8 +1464,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: .LBB17_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB17_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB17_1 +; LA64F-NEXT: bne $a3, $a2, .LBB17_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -1481,6 +1481,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB17_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB17_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -1498,8 +1499,7 @@ define float @float_fsub_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: .LBB17_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB17_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB17_1 +; LA64D-NEXT: bne $a3, $a2, .LBB17_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 @@ -1521,6 +1521,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB18_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB18_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -1538,8 +1539,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: .LBB18_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB18_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB18_1 +; LA64F-NEXT: bne $a3, $a2, .LBB18_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -1557,6 +1557,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB18_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB18_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -1574,8 +1575,7 @@ define float @float_fmin_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: .LBB18_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB18_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB18_1 +; LA64D-NEXT: bne $a3, $a2, .LBB18_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 @@ -1597,6 +1597,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB19_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB19_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -1614,8 +1615,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64F-NEXT: .LBB19_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB19_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB19_1 +; LA64F-NEXT: bne $a3, $a2, .LBB19_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -1633,6 +1633,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB19_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB19_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -1650,8 +1651,7 @@ define float @float_fmax_acq_rel(ptr %p) nounwind { ; LA64D-NEXT: .LBB19_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB19_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB19_1 +; LA64D-NEXT: bne $a3, $a2, .LBB19_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 @@ -2074,6 +2074,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB24_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB24_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2091,8 +2092,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: .LBB24_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB24_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB24_1 +; LA64F-NEXT: bne $a3, $a2, .LBB24_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2109,6 +2109,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB24_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB24_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2126,8 +2127,7 @@ define float @float_fadd_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: .LBB24_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB24_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB24_1 +; LA64D-NEXT: bne $a3, $a2, .LBB24_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 @@ -2148,6 +2148,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB25_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB25_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2165,8 +2166,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: .LBB25_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB25_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB25_1 +; LA64F-NEXT: bne $a3, $a2, .LBB25_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2183,6 +2183,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB25_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB25_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2200,8 +2201,7 @@ define float @float_fsub_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: .LBB25_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB25_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB25_1 +; LA64D-NEXT: bne $a3, $a2, .LBB25_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 @@ -2223,6 +2223,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB26_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB26_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2240,8 +2241,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: .LBB26_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB26_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB26_1 +; LA64F-NEXT: bne $a3, $a2, .LBB26_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2259,6 +2259,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB26_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB26_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2276,8 +2277,7 @@ define float @float_fmin_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: .LBB26_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB26_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB26_1 +; LA64D-NEXT: bne $a3, $a2, .LBB26_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 @@ -2299,6 +2299,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB27_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB27_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2316,8 +2317,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64F-NEXT: .LBB27_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB27_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB27_1 +; LA64F-NEXT: bne $a3, $a2, .LBB27_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2335,6 +2335,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB27_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB27_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2352,8 +2353,7 @@ define float @float_fmax_seq_cst(ptr %p) nounwind { ; LA64D-NEXT: .LBB27_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB27_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB27_1 +; LA64D-NEXT: bne $a3, $a2, .LBB27_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 @@ -2752,6 +2752,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB32_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB32_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2769,8 +2770,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64F-NEXT: .LBB32_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB32_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB32_1 +; LA64F-NEXT: bne $a3, $a2, .LBB32_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2787,6 +2787,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB32_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB32_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2804,8 +2805,7 @@ define float @float_fadd_monotonic(ptr %p) nounwind { ; LA64D-NEXT: .LBB32_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB32_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB32_1 +; LA64D-NEXT: bne $a3, $a2, .LBB32_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 @@ -2826,6 +2826,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB33_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB33_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2843,8 +2844,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64F-NEXT: .LBB33_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB33_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB33_1 +; LA64F-NEXT: bne $a3, $a2, .LBB33_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2861,6 +2861,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB33_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB33_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2878,8 +2879,7 @@ define float @float_fsub_monotonic(ptr %p) nounwind { ; LA64D-NEXT: .LBB33_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB33_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB33_1 +; LA64D-NEXT: bne $a3, $a2, .LBB33_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 @@ -2901,6 +2901,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB34_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB34_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2918,8 +2919,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64F-NEXT: .LBB34_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB34_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB34_1 +; LA64F-NEXT: bne $a3, $a2, .LBB34_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -2937,6 +2937,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB34_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB34_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -2954,8 +2955,7 @@ define float @float_fmin_monotonic(ptr %p) nounwind { ; LA64D-NEXT: .LBB34_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB34_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB34_1 +; LA64D-NEXT: bne $a3, $a2, .LBB34_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 @@ -2977,6 +2977,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB35_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB35_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -2994,8 +2995,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64F-NEXT: .LBB35_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB35_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB35_1 +; LA64F-NEXT: bne $a3, $a2, .LBB35_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -3013,6 +3013,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB35_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB35_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -3030,8 +3031,7 @@ define float @float_fmax_monotonic(ptr %p) nounwind { ; LA64D-NEXT: .LBB35_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB35_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB35_1 +; LA64D-NEXT: bne $a3, $a2, .LBB35_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 diff --git a/llvm/test/CodeGen/X86/combine-and.ll b/llvm/test/CodeGen/X86/combine-and.ll index d223b75419ac4..294fcd6a9563e 100644 --- a/llvm/test/CodeGen/X86/combine-and.ll +++ b/llvm/test/CodeGen/X86/combine-and.ll @@ -1171,6 +1171,25 @@ define <4 x i32> @neg_scalar_broadcast_two_uses(i32 %a0, <4 x i32> %a1, ptr %a2) ret <4 x i32> %4 } +; PR84660 - check for illegal types +define <2 x i128> @neg_scalar_broadcast_illegaltype(i128 %arg) { +; CHECK-LABEL: neg_scalar_broadcast_illegaltype: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: notl %esi +; CHECK-NEXT: andl $1, %esi +; CHECK-NEXT: movq %rsi, 16(%rdi) +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: movq $0, 24(%rdi) +; CHECK-NEXT: movq $0, 8(%rdi) +; CHECK-NEXT: retq + %i = xor i128 %arg, 1 + %i1 = insertelement <2 x i128> zeroinitializer, i128 %i, i64 0 + %i2 = shufflevector <2 x i128> %i1, <2 x i128> zeroinitializer, <2 x i32> zeroinitializer + %i3 = and <2 x i128> , %i2 + ret <2 x i128> %i3 +} + define <2 x i64> @andnp_xx(<2 x i64> %v0) nounwind { ; SSE-LABEL: andnp_xx: ; SSE: # %bb.0: diff --git a/llvm/test/MC/AArch64/cfi-bad-nesting-darwin.s b/llvm/test/MC/AArch64/cfi-bad-nesting-darwin.s index 235b7d4480992..3a5af86defc59 100644 --- a/llvm/test/MC/AArch64/cfi-bad-nesting-darwin.s +++ b/llvm/test/MC/AArch64/cfi-bad-nesting-darwin.s @@ -8,6 +8,10 @@ .p2align 2 _locomotive: .cfi_startproc + ; An N_ALT_ENTRY symbol can be defined in the middle of a subsection, so + ; these are opted out of the .cfi_{start,end}proc nesting check. + .alt_entry _engineer +_engineer: ret ; It is invalid to have a non-private label between .cfi_startproc and @@ -17,7 +21,7 @@ _locomotive: .p2align 2 _caboose: ; DARWIN: [[#@LINE-1]]:1: error: non-private labels cannot appear between .cfi_startproc / .cfi_endproc pairs -; DARWIN: [[#@LINE-10]]:2: error: previous .cfi_startproc was here +; DARWIN: [[#@LINE-14]]:2: error: previous .cfi_startproc was here ret .cfi_endproc diff --git a/llvm/test/MC/Mips/cpsetup.s b/llvm/test/MC/Mips/cpsetup.s index 8e587aea3e7e6..4a027c6e796ae 100644 --- a/llvm/test/MC/Mips/cpsetup.s +++ b/llvm/test/MC/Mips/cpsetup.s @@ -4,8 +4,6 @@ # RUN: llvm-mc -triple mips-unknown-linux -target-abi o32 %s | \ # RUN: FileCheck -check-prefixes=ASM,ASM-O32 %s -# FIXME: Now we check .cpsetup expansion for `-mno-shared` case only. -# We also need to implement/check the `-mshared` case. # RUN: llvm-mc -triple mips64-unknown-linux -target-abi n32 -filetype=obj -o - %s | \ # RUN: llvm-objdump --no-print-imm-hex -d -r -z - | \ # RUN: FileCheck -check-prefixes=ALL,NXX,N32 %s @@ -35,11 +33,16 @@ t1: # NXX-NEXT: sd $gp, 8($sp) # NXX-NEXT: lui $gp, 0 -# N32-NEXT: R_MIPS_HI16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_HI16 # NXX-NEXT: addiu $gp, $gp, 0 -# N32-NEXT: R_MIPS_LO16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_LO16 +# N32-NEXT: addu $gp, $gp, $25 # N64-NEXT: daddu $gp, $gp, $25 # ASM-NEXT: .cpsetup $25, 8, __cerror @@ -64,11 +67,16 @@ t2: # NXX-NEXT: move $2, $gp # NXX-NEXT: lui $gp, 0 -# N32-NEXT: R_MIPS_HI16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_HI16 # NXX-NEXT: addiu $gp, $gp, 0 -# N32-NEXT: R_MIPS_LO16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_LO16 +# N32-NEXT: addu $gp, $gp, $25 # N64-NEXT: daddu $gp, $gp, $25 # ASM-NEXT: .cpsetup $25, $2, __cerror @@ -101,11 +109,16 @@ t3: # NXX-NEXT: move $2, $gp # NXX-NEXT: lui $gp, 0 -# N32-NEXT: {{^ *0+}}38: R_MIPS_HI16 __gnu_local_gp # N64-NEXT: {{^ *0+}}40: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 .text +# N32-NEXT: {{^ *0+}}40: R_MIPS_GPREL16 .text +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_HI16 # NXX-NEXT: addiu $gp, $gp, 0 -# N32-NEXT: {{^ *0+}}3c: R_MIPS_LO16 __gnu_local_gp # N64-NEXT: {{^ *0+}}44: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 .text +# N32-NEXT: {{^ *0+}}44: R_MIPS_GPREL16 .text +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_LO16 +# N32-NEXT: addu $gp, $gp, $25 # N64-NEXT: daddu $gp, $gp, $25 # NXX-NEXT: nop # NXX-NEXT: sub $3, $3, $2 @@ -158,11 +171,16 @@ t5: # NXX-NEXT: sd $gp, 8($sp) # NXX-NEXT: lui $gp, 0 -# N32-NEXT: R_MIPS_HI16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_HI16 # NXX-NEXT: addiu $gp, $gp, 0 -# N32-NEXT: R_MIPS_LO16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_LO16 +# N32-NEXT: addu $gp, $gp, $25 # N64-NEXT: daddu $gp, $gp, $25 # ASM-NEXT: .cpsetup $25, 8, __cerror @@ -184,11 +202,16 @@ IMM_8 = 8 # NXX-NEXT: sd $gp, 8($sp) # NXX-NEXT: lui $gp, 0 -# N32-NEXT: R_MIPS_HI16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_HI16 # NXX-NEXT: addiu $gp, $gp, 0 -# N32-NEXT: R_MIPS_LO16 __gnu_local_gp # N64-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror +# N32-NEXT: R_MIPS_GPREL16 __cerror +# N32-NEXT: R_MIPS_SUB +# N32-NEXT: R_MIPS_LO16 +# N32-NEXT: addu $gp, $gp, $25 # N64-NEXT: daddu $gp, $gp, $25 # ASM-NEXT: .cpsetup $25, 8, __cerror diff --git a/llvm/test/TableGen/MacroFusion.td b/llvm/test/TableGen/MacroFusion.td index 4aa6c8d9acb27..ce76e7f0f7fa6 100644 --- a/llvm/test/TableGen/MacroFusion.td +++ b/llvm/test/TableGen/MacroFusion.td @@ -34,6 +34,11 @@ let Namespace = "Test" in { def Inst0 : TestInst<0>; def Inst1 : TestInst<1>; +def BothFusionPredicate: BothFusionPredicateWithMCInstPredicate>; +def TestBothFusionPredicate: Fusion<"test-both-fusion-predicate", "HasBothFusionPredicate", + "Test BothFusionPredicate", + [BothFusionPredicate]>; + def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion", CheckOpcode<[Inst0]>, CheckAll<[ @@ -45,6 +50,7 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion", // CHECK-PREDICATOR-NEXT: #undef GET_Test_MACRO_FUSION_PRED_DECL // CHECK-PREDICATOR-EMPTY: // CHECK-PREDICATOR-NEXT: namespace llvm { +// CHECK-PREDICATOR-NEXT: bool isTestBothFusionPredicate(const TargetInstrInfo &, const TargetSubtargetInfo &, const MachineInstr *, const MachineInstr &); // CHECK-PREDICATOR-NEXT: bool isTestFusion(const TargetInstrInfo &, const TargetSubtargetInfo &, const MachineInstr *, const MachineInstr &); // CHECK-PREDICATOR-NEXT: } // end namespace llvm // CHECK-PREDICATOR-EMPTY: @@ -54,6 +60,24 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion", // CHECK-PREDICATOR-NEXT: #undef GET_Test_MACRO_FUSION_PRED_IMPL // CHECK-PREDICATOR-EMPTY: // CHECK-PREDICATOR-NEXT: namespace llvm { +// CHECK-PREDICATOR-NEXT: bool isTestBothFusionPredicate( +// CHECK-PREDICATOR-NEXT: const TargetInstrInfo &TII, +// CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, +// CHECK-PREDICATOR-NEXT: const MachineInstr *FirstMI, +// CHECK-PREDICATOR-NEXT: const MachineInstr &SecondMI) { +// CHECK-PREDICATOR-NEXT: auto &MRI = SecondMI.getMF()->getRegInfo(); +// CHECK-PREDICATOR-NEXT: { +// CHECK-PREDICATOR-NEXT: const MachineInstr *MI = FirstMI; +// CHECK-PREDICATOR-NEXT: if (MI->getOperand(0).getReg() != Test::X0) +// CHECK-PREDICATOR-NEXT: return false; +// CHECK-PREDICATOR-NEXT: } +// CHECK-PREDICATOR-NEXT: { +// CHECK-PREDICATOR-NEXT: const MachineInstr *MI = &SecondMI; +// CHECK-PREDICATOR-NEXT: if (MI->getOperand(0).getReg() != Test::X0) +// CHECK-PREDICATOR-NEXT: return false; +// CHECK-PREDICATOR-NEXT: } +// CHECK-PREDICATOR-NEXT: return true; +// CHECK-PREDICATOR-NEXT: } // CHECK-PREDICATOR-NEXT: bool isTestFusion( // CHECK-PREDICATOR-NEXT: const TargetInstrInfo &TII, // CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, @@ -106,6 +130,7 @@ def TestFusion: SimpleFusion<"test-fusion", "HasTestFusion", "Test Fusion", // CHECK-SUBTARGET: std::vector TestGenSubtargetInfo::getMacroFusions() const { // CHECK-SUBTARGET-NEXT: std::vector Fusions; +// CHECK-SUBTARGET-NEXT: if (hasFeature(Test::TestBothFusionPredicate)) Fusions.push_back(llvm::isTestBothFusionPredicate); // CHECK-SUBTARGET-NEXT: if (hasFeature(Test::TestFusion)) Fusions.push_back(llvm::isTestFusion); // CHECK-SUBTARGET-NEXT: return Fusions; // CHECK-SUBTARGET-NEXT: } diff --git a/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll b/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll new file mode 100644 index 0000000000000..1e1669b29b0db --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/aliasing-and-non-aliasing-loads-with-clobber.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p argpromotion -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +@f = dso_local global { i16, i64 } { i16 1, i64 0 }, align 8 + +; Test case for https://github.com/llvm/llvm-project/issues/84807. + +; Make sure the loads from @callee are not moved to @caller, as the store +; in %then may aliases to load from %q. + +define i32 @caller1(i1 %c) { +; CHECK-LABEL: define i32 @caller1( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @callee1(ptr noundef nonnull @f, i1 [[C]]) +; CHECK-NEXT: ret i32 0 +; +entry: + call void @callee1(ptr noundef nonnull @f, i1 %c) + ret i32 0 +} + +define internal void @callee1(ptr nocapture noundef readonly %q, i1 %c) { +; CHECK-LABEL: define internal void @callee1( +; CHECK-SAME: ptr nocapture noundef readonly [[Q:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[EXIT:%.*]] +; CHECK: then: +; CHECK-NEXT: store i16 123, ptr @f, align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[Q_0_VAL:%.*]] = load i16, ptr [[Q]], align 8 +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 8 +; CHECK-NEXT: [[Q_8_VAL:%.*]] = load i64, ptr [[GEP_8]], align 8 +; CHECK-NEXT: call void @use(i16 [[Q_0_VAL]], i64 [[Q_8_VAL]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %then, label %exit + +then: + store i16 123, ptr @f, align 8 + br label %exit + +exit: + %l.0 = load i16, ptr %q, align 8 + %gep.8 = getelementptr inbounds i8, ptr %q, i64 8 + %l.1 = load i64, ptr %gep.8, align 8 + call void @use(i16 %l.0, i64 %l.1) + ret void + + uselistorder ptr %q, { 1, 0 } +} + +; Same as @caller1/callee2, but with default uselist order. +define i32 @caller2(i1 %c) { +; CHECK-LABEL: define i32 @caller2( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @callee2(ptr noundef nonnull @f, i1 [[C]]) +; CHECK-NEXT: ret i32 0 +; +entry: + call void @callee2(ptr noundef nonnull @f, i1 %c) + ret i32 0 +} + +define internal void @callee2(ptr nocapture noundef readonly %q, i1 %c) { +; CHECK-LABEL: define internal void @callee2( +; CHECK-SAME: ptr nocapture noundef readonly [[Q:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[EXIT:%.*]] +; CHECK: then: +; CHECK-NEXT: store i16 123, ptr @f, align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[Q_0_VAL:%.*]] = load i16, ptr [[Q]], align 8 +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 8 +; CHECK-NEXT: [[Q_8_VAL:%.*]] = load i64, ptr [[GEP_8]], align 8 +; CHECK-NEXT: call void @use(i16 [[Q_0_VAL]], i64 [[Q_8_VAL]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %then, label %exit + +then: + store i16 123, ptr @f, align 8 + br label %exit + +exit: + %l.0 = load i16, ptr %q, align 8 + %gep.8 = getelementptr inbounds i8, ptr %q, i64 8 + %l.1 = load i64, ptr %gep.8, align 8 + call void @use(i16 %l.0, i64 %l.1) + ret void +} + +declare void @use(i16, i64) diff --git a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll new file mode 100644 index 0000000000000..ee9bd6912e2ae --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=dse < %s | FileCheck %s +; +; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill +; `call void @llvm.memset.p0.i64(...)` because it has a clobber read: +; `%ret = load ptr, ptr %struct.byte.8` + + +%struct.type = type { ptr, ptr } + +define ptr @foo(ptr noundef %ptr) { +; CHECK-LABEL: define ptr @foo( +; CHECK-SAME: ptr noundef [[PTR:%.*]]) { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +; Set of tests based on @foo, but where the memset's operands cannot be erased +; due to other uses. Instead, they contain a number of removable MemoryDefs; +; with non-void types result types. + +define ptr @foo_with_removable_malloc() { +; CHECK-LABEL: define ptr @foo_with_removable_malloc() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + + ; Set of removable memory deffs + %m2 = tail call ptr @malloc(i64 4) + %m1 = tail call ptr @malloc(i64 4) + store i32 0, ptr %struct.byte.8 + store i32 0, ptr %struct.byte.8 + store i32 123, ptr %m1 + store i32 123, ptr %m2 + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +define ptr @foo_with_removable_malloc_free() { +; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4) +; CHECK-NEXT: call void @free(ptr [[M1]]) +; CHECK-NEXT: call void @free(ptr [[M2]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + %m1 = tail call ptr @malloc(i64 4) + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + + store i32 0, ptr %struct.byte.4 + store i32 0, ptr %struct.byte.8 + %m2 = tail call ptr @malloc(i64 4) + store i32 123, ptr %m1 + call void @free(ptr %m1); + store i32 123, ptr %m2 + call void @free(ptr %m2); + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +define ptr @foo_with_malloc_to_calloc() { +; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4) +; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: call void @use(ptr [[CALLOC1]]) +; CHECK-NEXT: call void @use(ptr [[CALLOC]]) +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + + ; Set of removable memory deffs + %m1 = tail call ptr @malloc(i64 4) + %m2 = tail call ptr @malloc(i64 4) + store i32 0, ptr %struct.byte.4 + store i32 0, ptr %struct.byte.8 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false) + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false) + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + call void @use(ptr %m1) + call void @use(ptr %m2) + ret ptr %ret +} + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc" +declare void @readnone(ptr) readnone nounwind +declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc" + +declare void @use(ptr) diff --git a/llvm/test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll b/llvm/test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll new file mode 100644 index 0000000000000..60a010cd49ced --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/malloc-earliest-escape-info-invalidation.ll @@ -0,0 +1,302 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p dse -S %s | FileCheck %s + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + +define void @widget(ptr %a) { +; CHECK-LABEL: define void @widget( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CALL1:%.*]] = tail call noalias ptr @malloc(i64 0) +; CHECK-NEXT: store ptr [[CALL1]], ptr [[A]], align 8 +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[A]], align 8 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[LOAD]], align 8 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[CALL1]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 1 +; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 8 +; CHECK-NEXT: store i16 0, ptr [[GETELEMENTPTR4]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 12 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[LOAD6:%.*]] = load i32, ptr inttoptr (i64 4 to ptr), align 4 +; CHECK-NEXT: br label [[BB48:%.*]] +; CHECK: bb7: +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb8: +; CHECK-NEXT: br label [[BB53:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[CALL1]], [[BB7:%.*]] ], [ [[A]], [[BB43:%.*]] ] +; CHECK-NEXT: [[GETELEMENTPTR10:%.*]] = getelementptr i8, ptr [[PHI]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr i8, ptr [[PHI]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR12:%.*]] = getelementptr i8, ptr [[PHI]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 1 +; CHECK-NEXT: store i8 0, ptr [[CALL1]], align 1 +; CHECK-NEXT: br label [[BB29:%.*]] +; CHECK: bb14: +; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR10]], i64 8 +; CHECK-NEXT: [[LOAD16:%.*]] = load i16, ptr [[CALL1]], align 4 +; CHECK-NEXT: br i1 false, label [[BB22:%.*]], label [[BB17:%.*]] +; CHECK: bb17: +; CHECK-NEXT: [[GETELEMENTPTR18:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR11]], i64 8 +; CHECK-NEXT: [[LOAD19:%.*]] = load i16, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR20:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 8 +; CHECK-NEXT: store i16 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR21:%.*]] = getelementptr i8, ptr [[PHI]], i64 0 +; CHECK-NEXT: br label [[BB25:%.*]] +; CHECK: bb22: +; CHECK-NEXT: [[GETELEMENTPTR23:%.*]] = getelementptr i8, ptr [[PHI]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR24:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR23]], i64 12 +; CHECK-NEXT: br label [[BB25]] +; CHECK: bb25: +; CHECK-NEXT: [[PHI26:%.*]] = phi ptr [ [[A]], [[BB17]] ], [ [[CALL1]], [[BB22]] ] +; CHECK-NEXT: [[PHI27:%.*]] = phi ptr [ [[CALL1]], [[BB17]] ], [ [[CALL1]], [[BB22]] ] +; CHECK-NEXT: [[PHI28:%.*]] = phi ptr [ [[CALL1]], [[BB17]] ], [ [[CALL1]], [[BB22]] ] +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB29]] +; CHECK: bb29: +; CHECK-NEXT: [[PHI30:%.*]] = phi ptr [ [[CALL1]], [[BB9]] ], [ [[CALL1]], [[BB25]] ] +; CHECK-NEXT: [[PHI31:%.*]] = phi ptr [ [[CALL1]], [[BB9]] ], [ [[CALL1]], [[BB25]] ] +; CHECK-NEXT: [[LOAD32:%.*]] = load i8, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[LOAD33:%.*]] = load i8, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR34:%.*]] = getelementptr i8, ptr [[PHI31]], i64 12 +; CHECK-NEXT: [[GETELEMENTPTR35:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 12 +; CHECK-NEXT: br label [[BB86:%.*]] +; CHECK: bb36: +; CHECK-NEXT: [[GETELEMENTPTR37:%.*]] = getelementptr i8, ptr [[PHI30]], i64 12 +; CHECK-NEXT: br label [[BB38:%.*]] +; CHECK: bb38: +; CHECK-NEXT: [[GETELEMENTPTR39:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR34]], i64 0, i64 0 +; CHECK-NEXT: [[LOAD40:%.*]] = load i32, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR41:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR37]], i64 0, i64 0 +; CHECK-NEXT: [[LOAD42:%.*]] = load i32, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB38]] +; CHECK: bb43: +; CHECK-NEXT: [[GETELEMENTPTR44:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 8 +; CHECK-NEXT: [[LOAD45:%.*]] = load i16, ptr [[CALL1]], align 4 +; CHECK-NEXT: store i16 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: store i8 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR46:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 12 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR47:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR12]], i64 16 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb48: +; CHECK-NEXT: [[GETELEMENTPTR49:%.*]] = getelementptr i8, ptr [[CALL1]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR50:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR49]], i64 1 +; CHECK-NEXT: [[GETELEMENTPTR51:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR49]], i64 8 +; CHECK-NEXT: [[GETELEMENTPTR52:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR49]], i64 12 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB48]] +; CHECK: bb53: +; CHECK-NEXT: [[PHI54:%.*]] = phi ptr [ [[CALL1]], [[BB8:%.*]] ], [ [[A]], [[BB71:%.*]] ] +; CHECK-NEXT: [[GETELEMENTPTR55:%.*]] = getelementptr i8, ptr [[PHI54]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR56:%.*]] = getelementptr i8, ptr [[PHI54]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR57:%.*]] = getelementptr i8, ptr [[PHI54]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR58:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 1 +; CHECK-NEXT: br label [[BB71]] +; CHECK: bb59: +; CHECK-NEXT: [[GETELEMENTPTR60:%.*]] = getelementptr i8, ptr [[PHI54]], i64 0 +; CHECK-NEXT: [[GETELEMENTPTR61:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR60]], i64 12 +; CHECK-NEXT: br label [[BB67:%.*]] +; CHECK: bb62: +; CHECK-NEXT: [[GETELEMENTPTR63:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR56]], i64 8 +; CHECK-NEXT: [[LOAD64:%.*]] = load i16, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR65:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 8 +; CHECK-NEXT: store i16 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR66:%.*]] = getelementptr i8, ptr [[PHI54]], i64 0 +; CHECK-NEXT: br label [[BB67]] +; CHECK: bb67: +; CHECK-NEXT: [[PHI68:%.*]] = phi ptr [ [[A]], [[BB62:%.*]] ], [ [[CALL1]], [[BB59:%.*]] ] +; CHECK-NEXT: [[PHI69:%.*]] = phi ptr [ [[CALL1]], [[BB62]] ], [ [[CALL1]], [[BB59]] ] +; CHECK-NEXT: [[PHI70:%.*]] = phi ptr [ [[CALL1]], [[BB62]] ], [ [[CALL1]], [[BB59]] ] +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB71]] +; CHECK: bb71: +; CHECK-NEXT: [[PHI72:%.*]] = phi ptr [ [[CALL1]], [[BB53]] ], [ [[CALL1]], [[BB67]] ] +; CHECK-NEXT: [[PHI73:%.*]] = phi ptr [ [[CALL1]], [[BB53]] ], [ [[CALL1]], [[BB67]] ] +; CHECK-NEXT: [[LOAD74:%.*]] = load i8, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[LOAD75:%.*]] = load i8, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR76:%.*]] = getelementptr i8, ptr [[PHI72]], i64 12 +; CHECK-NEXT: [[GETELEMENTPTR77:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 12 +; CHECK-NEXT: [[GETELEMENTPTR78:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR76]], i64 0, i64 0 +; CHECK-NEXT: [[LOAD79:%.*]] = load i32, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR80:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR77]], i64 0, i64 0 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[LOAD81:%.*]] = load i8, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR82:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 8 +; CHECK-NEXT: [[LOAD83:%.*]] = load i16, ptr [[CALL1]], align 4 +; CHECK-NEXT: store i16 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: store i8 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR84:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 12 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR85:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR57]], i64 16 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB53]] +; CHECK: bb86: +; CHECK-NEXT: [[GETELEMENTPTR87:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR34]], i64 0, i64 0 +; CHECK-NEXT: [[LOAD88:%.*]] = load i32, ptr [[CALL1]], align 4 +; CHECK-NEXT: [[GETELEMENTPTR89:%.*]] = getelementptr [0 x i32], ptr [[GETELEMENTPTR35]], i64 0, i64 0 +; CHECK-NEXT: store i32 0, ptr [[CALL1]], align 4 +; CHECK-NEXT: br label [[BB86]] +; +bb: + %call = tail call ptr @malloc(i64 1) + tail call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 1, i1 false) + %call1 = tail call noalias ptr @malloc(i64 0) + store ptr %call1, ptr %a, align 8 + %load = load ptr, ptr %a, align 8 + %load2 = load i32, ptr %load, align 8 + %getelementptr = getelementptr i8, ptr %call1, i64 0 + %getelementptr3 = getelementptr i8, ptr %getelementptr, i64 1 + store i8 0, ptr %call1, align 1 + %getelementptr4 = getelementptr i8, ptr %getelementptr, i64 8 + store i16 0, ptr %getelementptr4, align 4 + %getelementptr5 = getelementptr i8, ptr %getelementptr, i64 12 + store i32 0, ptr %call1, align 4 + %load6 = load i32, ptr inttoptr (i64 4 to ptr), align 4 + br label %bb48 + +bb7: ; No predecessors! + br label %bb9 + +bb8: ; No predecessors! + br label %bb53 + +bb9: ; preds = %bb43, %bb7 + %phi = phi ptr [ %call1, %bb7 ], [ %a, %bb43 ] + %getelementptr10 = getelementptr i8, ptr %phi, i64 0 + %getelementptr11 = getelementptr i8, ptr %phi, i64 0 + %getelementptr12 = getelementptr i8, ptr %phi, i64 0 + %getelementptr13 = getelementptr i8, ptr %getelementptr12, i64 1 + store i8 0, ptr %call1, align 1 + br label %bb29 + +bb14: ; No predecessors! + %getelementptr15 = getelementptr i8, ptr %getelementptr10, i64 8 + %load16 = load i16, ptr %call1, align 4 + br i1 false, label %bb22, label %bb17 + +bb17: ; preds = %bb14 + %getelementptr18 = getelementptr i8, ptr %getelementptr11, i64 8 + %load19 = load i16, ptr %call1, align 4 + %getelementptr20 = getelementptr i8, ptr %getelementptr12, i64 8 + store i16 0, ptr %call1, align 4 + %getelementptr21 = getelementptr i8, ptr %phi, i64 0 + br label %bb25 + +bb22: ; preds = %bb14 + %getelementptr23 = getelementptr i8, ptr %phi, i64 0 + %getelementptr24 = getelementptr i8, ptr %getelementptr23, i64 12 + br label %bb25 + +bb25: ; preds = %bb22, %bb17 + %phi26 = phi ptr [ %a, %bb17 ], [ %call1, %bb22 ] + %phi27 = phi ptr [ %call1, %bb17 ], [ %call1, %bb22 ] + %phi28 = phi ptr [ %call1, %bb17 ], [ %call1, %bb22 ] + store i32 0, ptr %call1, align 4 + br label %bb29 + +bb29: ; preds = %bb25, %bb9 + %phi30 = phi ptr [ %call1, %bb9 ], [ %call1, %bb25 ] + %phi31 = phi ptr [ %call1, %bb9 ], [ %call1, %bb25 ] + %load32 = load i8, ptr %call1, align 4 + %load33 = load i8, ptr %call1, align 4 + %getelementptr34 = getelementptr i8, ptr %phi31, i64 12 + %getelementptr35 = getelementptr i8, ptr %getelementptr12, i64 12 + br label %bb86 + +bb36: ; No predecessors! + %getelementptr37 = getelementptr i8, ptr %phi30, i64 12 + br label %bb38 + +bb38: ; preds = %bb38, %bb36 + %getelementptr39 = getelementptr [0 x i32], ptr %getelementptr34, i64 0, i64 0 + %load40 = load i32, ptr %call1, align 4 + %getelementptr41 = getelementptr [0 x i32], ptr %getelementptr37, i64 0, i64 0 + %load42 = load i32, ptr %call1, align 4 + br label %bb38 + +bb43: ; No predecessors! + %getelementptr44 = getelementptr i8, ptr %getelementptr12, i64 8 + %load45 = load i16, ptr %call1, align 4 + store i16 0, ptr %call1, align 4 + store i8 0, ptr %call1, align 4 + %getelementptr46 = getelementptr i8, ptr %getelementptr12, i64 12 + store i32 0, ptr %call1, align 4 + %getelementptr47 = getelementptr i8, ptr %getelementptr12, i64 16 + store i32 0, ptr %call1, align 4 + br label %bb9 + +bb48: ; preds = %bb48, %bb + %getelementptr49 = getelementptr i8, ptr %call1, i64 0 + %getelementptr50 = getelementptr i8, ptr %getelementptr49, i64 1 + store i8 0, ptr %call1, align 1 + %getelementptr51 = getelementptr i8, ptr %getelementptr49, i64 8 + store i16 0, ptr %call1, align 4 + %getelementptr52 = getelementptr i8, ptr %getelementptr49, i64 12 + store i32 0, ptr %call1, align 4 + br label %bb48 + +bb53: ; preds = %bb71, %bb8 + %phi54 = phi ptr [ %call1, %bb8 ], [ %a, %bb71 ] + %getelementptr55 = getelementptr i8, ptr %phi54, i64 0 + %getelementptr56 = getelementptr i8, ptr %phi54, i64 0 + %getelementptr57 = getelementptr i8, ptr %phi54, i64 0 + %getelementptr58 = getelementptr i8, ptr %getelementptr57, i64 1 + br label %bb71 + +bb59: ; No predecessors! + %getelementptr60 = getelementptr i8, ptr %phi54, i64 0 + %getelementptr61 = getelementptr i8, ptr %getelementptr60, i64 12 + br label %bb67 + +bb62: ; No predecessors! + %getelementptr63 = getelementptr i8, ptr %getelementptr56, i64 8 + %load64 = load i16, ptr %call1, align 4 + %getelementptr65 = getelementptr i8, ptr %getelementptr57, i64 8 + store i16 0, ptr %call1, align 4 + %getelementptr66 = getelementptr i8, ptr %phi54, i64 0 + br label %bb67 + +bb67: ; preds = %bb62, %bb59 + %phi68 = phi ptr [ %a, %bb62 ], [ %call1, %bb59 ] + %phi69 = phi ptr [ %call1, %bb62 ], [ %call1, %bb59 ] + %phi70 = phi ptr [ %call1, %bb62 ], [ %call1, %bb59 ] + store i32 0, ptr %call1, align 4 + br label %bb71 + +bb71: ; preds = %bb67, %bb53 + %phi72 = phi ptr [ %call1, %bb53 ], [ %call1, %bb67 ] + %phi73 = phi ptr [ %call1, %bb53 ], [ %call1, %bb67 ] + %load74 = load i8, ptr %call1, align 4 + %load75 = load i8, ptr %call1, align 4 + %getelementptr76 = getelementptr i8, ptr %phi72, i64 12 + %getelementptr77 = getelementptr i8, ptr %getelementptr57, i64 12 + %getelementptr78 = getelementptr [0 x i32], ptr %getelementptr76, i64 0, i64 0 + %load79 = load i32, ptr %call1, align 4 + %getelementptr80 = getelementptr [0 x i32], ptr %getelementptr77, i64 0, i64 0 + store i32 0, ptr %call1, align 4 + %load81 = load i8, ptr %call1, align 4 + %getelementptr82 = getelementptr i8, ptr %getelementptr57, i64 8 + %load83 = load i16, ptr %call1, align 4 + store i16 0, ptr %call1, align 4 + store i8 0, ptr %call1, align 4 + %getelementptr84 = getelementptr i8, ptr %getelementptr57, i64 12 + store i32 0, ptr %call1, align 4 + %getelementptr85 = getelementptr i8, ptr %getelementptr57, i64 16 + store i32 0, ptr %call1, align 4 + br label %bb53 + +bb86: ; preds = %bb86, %bb29 + %getelementptr87 = getelementptr [0 x i32], ptr %getelementptr34, i64 0, i64 0 + %load88 = load i32, ptr %call1, align 4 + %getelementptr89 = getelementptr [0 x i32], ptr %getelementptr35, i64 0, i64 0 + store i32 0, ptr %call1, align 4 + br label %bb86 +} + +declare ptr @malloc(i64) + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #0 + +attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll index 3a30980fe31bd..6f582cab2f145 100644 --- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll @@ -93,3 +93,29 @@ define internal void @caller_not_avx4() { } declare i64 @caller_unknown_simple(i64) + +; This call should get inlined, because the callee only contains +; inline ASM, not real calls. +define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm +; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64 +; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64 +; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]]) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) + ret <8 x i64> %call +} + +define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { + %src = load <8 x i64>, ptr %p0, align 64 + %a = load <8 x i64>, ptr %p1, align 64 + %b = load <8 x i64>, ptr %p2, align 64 + %3 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2 + ret <8 x i64> %3 +} + +attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } +attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll index 58bd81297b0dd..5ace1039c3782 100644 --- a/llvm/test/Transforms/InstCombine/bitcast.ll +++ b/llvm/test/Transforms/InstCombine/bitcast.ll @@ -686,6 +686,21 @@ define ptr @bitcast_from_single_element_pointer_vector_to_pointer(<1 x ptr> %ptr ret ptr %ptr } +; Sure that we calculate the correct shift. +define <4 x i32> @bitcast_shl(i32 %arg) { +; CHECK-LABEL: @bitcast_shl( +; CHECK-NEXT: [[I5:%.*]] = insertelement <4 x i32> , i32 [[ARG:%.*]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[I5]] +; + %i = zext i32 %arg to i64 + %i1 = shl i64 %i, 32 + %i2 = or i64 %i1, 65 + %i3 = zext i64 %i2 to i128 + %i4 = shl i128 %i3, 64 + %i5 = bitcast i128 %i4 to <4 x i32> + ret <4 x i32> %i5 +} + declare void @f1() declare void @f2() define ptr @select_bitcast_unsized_pointer(i1 %c) { diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 2704905f7a358..c87c1199f727e 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -292,7 +292,11 @@ entry: define void @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask(ptr %dst, i16 %val) { ; CHECK-LABEL: @scatter_nxv4i16_uniform_vals_uniform_ptrs_all_active_mask( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i16 [[VAL:%.*]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_VALUE:%.*]] = insertelement poison, i16 [[VAL:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATVALUE:%.*]] = shufflevector [[BROADCAST_VALUE]], poison, zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[BROADCAST_SPLATVALUE]], [[BROADCAST_SPLAT]], i32 2, shufflevector ( insertelement ( zeroinitializer, i1 true, i32 0), zeroinitializer, zeroinitializer)) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/pr83931.ll b/llvm/test/Transforms/InstCombine/pr83931.ll new file mode 100644 index 0000000000000..d36ac8d91abd3 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr83931.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define @dont_crash( %x) { +; CHECK-LABEL: define @dont_crash( +; CHECK-SAME: [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET:%.*]] = icmp sgt [[X]], shufflevector ( insertelement ( poison, i64 -309383, i64 0), poison, zeroinitializer) +; CHECK-NEXT: ret [[RET]] +; +entry: + %div = sdiv %x, splat (i64 309383) + %ret = icmp sge %div, zeroinitializer + ret %ret +} diff --git a/llvm/test/Transforms/InstCombine/pr83947.ll b/llvm/test/Transforms/InstCombine/pr83947.ll new file mode 100644 index 0000000000000..c1d601ff63718 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr83947.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +@c = global i32 0, align 4 +@b = global i32 0, align 4 + +define void @masked_scatter1() { +; CHECK-LABEL: define void @masked_scatter1() { +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( zeroinitializer, shufflevector ( insertelement ( poison, ptr @c, i64 0), poison, zeroinitializer), i32 4, shufflevector ( insertelement ( poison, i1 icmp eq (ptr getelementptr inbounds (i32, ptr @b, i64 1), ptr @c), i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.nxv4i32.nxv4p0( zeroinitializer, splat (ptr @c), i32 4, splat (i1 icmp eq (ptr getelementptr (i32, ptr @b, i64 1), ptr @c))) + ret void +} + +define void @masked_scatter2() { +; CHECK-LABEL: define void @masked_scatter2() { +; CHECK-NEXT: store i32 0, ptr @c, align 4 +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> splat (i1 true)) + ret void +} + +define void @masked_scatter3() { +; CHECK-LABEL: define void @masked_scatter3() { +; CHECK-NEXT: store i32 0, ptr @c, align 4 +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> undef) + ret void +} + +define void @masked_scatter4() { +; CHECK-LABEL: define void @masked_scatter4() { +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> splat (i1 false)) + ret void +} + +define void @masked_scatter5() { +; CHECK-LABEL: define void @masked_scatter5() { +; CHECK-NEXT: store i32 0, ptr @c, align 4 +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> ) + ret void +} + +define void @masked_scatter6() { +; CHECK-LABEL: define void @masked_scatter6() { +; CHECK-NEXT: store i32 0, ptr @c, align 4 +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> ) + ret void +} + +define void @masked_scatter7() { +; CHECK-LABEL: define void @masked_scatter7() { +; CHECK-NEXT: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> , i32 4, <2 x i1> ) +; CHECK-NEXT: ret void +; + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> splat (i1 icmp eq (ptr getelementptr (i32, ptr @b, i64 1), ptr @c))) + ret void +} diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index c5f1b77c6d740..888e7d28f78af 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -2849,12 +2849,14 @@ define i8 @select_replacement_sub(i8 %x, i8 %y, i8 %z) { ret i8 %sel } +; FIXME: This is safe to fold. define i8 @select_replacement_shift_noundef(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @select_replacement_shift_noundef( ; CHECK-NEXT: [[SHR:%.*]] = lshr exact i8 [[X:%.*]], 1 ; CHECK-NEXT: call void @use_i8(i8 noundef [[SHR]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SHR]], [[Y:%.*]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[Z:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[Y]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[SHL]], i8 [[Z:%.*]] ; CHECK-NEXT: ret i8 [[SEL]] ; %shr = lshr exact i8 %x, 1 @@ -2904,6 +2906,40 @@ define i32 @select_replacement_loop2(i32 %arg, i32 %arg2) { ret i32 %sel } +define i8 @select_replacement_loop3(i32 noundef %x) { +; CHECK-LABEL: @select_replacement_loop3( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[REV:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[TRUNC]]) +; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[REV]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[EXT]], [[X]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i8 [[TRUNC]], i8 0 +; CHECK-NEXT: ret i8 [[SEL]] +; + %trunc = trunc i32 %x to i8 + %rev = call i8 @llvm.bitreverse.i8(i8 %trunc) + %ext = zext i8 %rev to i32 + %cmp = icmp eq i32 %ext, %x + %sel = select i1 %cmp, i8 %trunc, i8 0 + ret i8 %sel +} + +define i16 @select_replacement_loop4(i16 noundef %p_12) { +; CHECK-LABEL: @select_replacement_loop4( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[P_12:%.*]], 2 +; CHECK-NEXT: [[AND1:%.*]] = and i16 [[P_12]], 1 +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[CMP1]], i16 [[AND1]], i16 0 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i16 [[AND2]], [[P_12]] +; CHECK-NEXT: [[AND3:%.*]] = select i1 [[CMP2]], i16 [[AND1]], i16 0 +; CHECK-NEXT: ret i16 [[AND3]] +; + %cmp1 = icmp ult i16 %p_12, 2 + %and1 = and i16 %p_12, 1 + %and2 = select i1 %cmp1, i16 %and1, i16 0 + %cmp2 = icmp eq i16 %and2, %p_12 + %and3 = select i1 %cmp2, i16 %and1, i16 0 + ret i16 %and3 +} + define ptr @select_replacement_gep_inbounds(ptr %base, i64 %offset) { ; CHECK-LABEL: @select_replacement_gep_inbounds( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]] @@ -3423,7 +3459,7 @@ define @scalable_sign_bits( %x) { define @scalable_non_zero( %x) { ; CHECK-LABEL: @scalable_non_zero( ; CHECK-NEXT: [[A:%.*]] = or [[X:%.*]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[CMP:%.*]] = icmp ule [[A]], shufflevector ( insertelement ( poison, i32 56, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult [[A]], shufflevector ( insertelement ( poison, i32 57, i64 0), poison, zeroinitializer) ; CHECK-NEXT: ret [[CMP]] ; %a = or %x, shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/InstCombine/vscale_cmp.ll b/llvm/test/Transforms/InstCombine/vscale_cmp.ll index a7f8368c5d62c..b2bfc93da089f 100644 --- a/llvm/test/Transforms/InstCombine/vscale_cmp.ll +++ b/llvm/test/Transforms/InstCombine/vscale_cmp.ll @@ -3,7 +3,7 @@ define @sge( %x) { ; CHECK-LABEL: @sge( -; CHECK-NEXT: [[CMP:%.*]] = icmp sge [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt [[X:%.*]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: ret [[CMP]] ; %cmp = icmp sge %x, zeroinitializer diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index 78dcd4471ae74..7f494e532b1f4 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -152,8 +152,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n"; OS.indent(4) << " return false;\n"; OS.indent(2) << "}\n"; - } else if (Predicate->isSubClassOf( - "FirstFusionPredicateWithMCInstPredicate")) { + } else if (Predicate->isSubClassOf("FusionPredicateWithMCInstPredicate")) { OS.indent(2) << "{\n"; OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; OS.indent(4) << "if ("; @@ -173,7 +172,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, PredicateExpander &PE, raw_ostream &OS) { - if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) { + if (Predicate->isSubClassOf("FusionPredicateWithMCInstPredicate")) { OS.indent(2) << "{\n"; OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; OS.indent(4) << "if ("; @@ -185,7 +184,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, OS.indent(2) << "}\n"; } else { PrintFatalError(Predicate->getLoc(), - "Unsupported predicate for first instruction: " + + "Unsupported predicate for second instruction: " + Predicate->getType()->getAsString()); } } @@ -196,9 +195,8 @@ void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate, if (Predicate->isSubClassOf("FusionPredicateWithCode")) OS << Predicate->getValueAsString("Predicate"); else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) { - Record *MCPred = Predicate->getValueAsDef("Predicate"); - emitFirstPredicate(MCPred, PE, OS); - emitSecondPredicate(MCPred, PE, OS); + emitFirstPredicate(Predicate, PE, OS); + emitSecondPredicate(Predicate, PE, OS); } else if (Predicate->isSubClassOf("TieReg")) { int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx"); int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx"); diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh index 0af16387ce1d8..4314b565e11b0 100755 --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -532,9 +532,9 @@ function build_llvmCore() { BuildTarget="clang" InstallTarget="install-clang install-clang-resource-headers" # compiler-rt builtins is needed on AIX to have a functional Phase 1 clang. - if [ "$System" = "AIX" -o "$Phase" != "1" ]; then + if [ "$System" = "AIX" ]; then BuildTarget="$BuildTarget runtimes" - InstallTarget="$InstallTarget install-runtimes" + InstallTarget="$InstallTarget install-builtins" fi fi if [ "$Phase" -eq "3" ]; then diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst index 3eeaf5c900d80..a5b39f61b0b64 100644 --- a/openmp/docs/ReleaseNotes.rst +++ b/openmp/docs/ReleaseNotes.rst @@ -19,3 +19,5 @@ from the `LLVM releases web site `_. Non-comprehensive list of changes in this release ================================================= + +* SystemZ support added. diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 259c57b5afbca..d51ec886cfe55 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1403,9 +1403,19 @@ extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); // subleaf is only needed for cache and topology discovery and can be set to // zero in most cases static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) { +#if KMP_ARCH_X86 && (defined(__pic__) || defined(__PIC__)) + // on i386 arch, the ebx reg. is used by pic, thus we need to preserve from + // being trashed beforehand + __asm__ __volatile__("mov %%ebx, %%edi\n" + "cpuid\n" + "xchg %%edi, %%ebx\n" + : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx) + : "a"(leaf), "c"(subleaf)); +#else __asm__ __volatile__("cpuid" : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx) : "a"(leaf), "c"(subleaf)); +#endif } // Load p into FPU control word static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) { @@ -2506,7 +2516,7 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ unsigned all : 1; unsigned unused : 3; @@ -2671,7 +2681,7 @@ typedef struct kmp_task_stack { #endif // BUILD_TIED_TASK_STACK typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ #if OMPX_TASKGRAPH unsigned reserved31 : 6; diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index e2a0cda01a971..6202f3d617cc5 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -120,7 +120,8 @@ extern void __kmp_validate_locks(void); struct kmp_base_tas_lock { // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) && \ + __LP64__ // Flip the ordering of the high and low 32-bit member to be consistent // with the memory layout of the address in 64-bit big-endian. kmp_int32 depth_locked; // depth locked, for nested locks only diff --git a/openmp/runtime/test/tasking/bug_nested_proxy_task.c b/openmp/runtime/test/tasking/bug_nested_proxy_task.c index 24fe1f3fe7607..9e0b412efce60 100644 --- a/openmp/runtime/test/tasking/bug_nested_proxy_task.c +++ b/openmp/runtime/test/tasking/bug_nested_proxy_task.c @@ -50,7 +50,7 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) unsigned all : 1; unsigned unused : 3; unsigned set : 1; diff --git a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c index 688860c035728..1e86d574f4f6a 100644 --- a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c +++ b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c @@ -47,7 +47,7 @@ typedef struct kmp_depend_info { union { kmp_uint8 flag; // flag as an unsigned char struct { // flag as a set of 8 bits -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) unsigned all : 1; unsigned unused : 3; unsigned set : 1; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h index ba57656cbac41..68e2b584c8773 100644 --- a/openmp/runtime/test/tasking/hidden_helper_task/common.h +++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h @@ -17,7 +17,7 @@ typedef struct kmp_depend_info { union { unsigned char flag; struct { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) unsigned all : 1; unsigned unused : 3; unsigned set : 1;