From c2dbaeb91a45aeb6d26f22efef318b5f5a0eb629 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 23 Jul 2024 11:06:16 +0200
Subject: [PATCH 001/427] Bump version to 19.1.0git

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 5e28283fbc1c6..aea9b880180ab 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -4,7 +4,7 @@ if(NOT DEFINED LLVM_VERSION_MAJOR)
   set(LLVM_VERSION_MAJOR 19)
 endif()
 if(NOT DEFINED LLVM_VERSION_MINOR)
-  set(LLVM_VERSION_MINOR 0)
+  set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 108f700823cbf..661af5be3c225 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190000
+#  define _LIBCPP_VERSION 190100
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 7c02ed396db5f..3f44a4645acf6 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
-llvm_version_minor = 0
+llvm_version_minor = 1
 llvm_version_patch = 0
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index a5a1ff66bf417..03edfc3360972 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 0, 0)
+__versioninfo__ = (19, 1, 0)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []

From fc9f6b0e0d8d8f876883883227da3cbd9ab2eb53 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 23 Jul 2024 13:03:27 +0200
Subject: [PATCH 002/427] [Infra] Fix version-check workflow (#100090)

---
 .github/workflows/version-check.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/version-check.yml b/.github/workflows/version-check.yml
index 4ce6119a407f5..894e07d323ca9 100644
--- a/.github/workflows/version-check.yml
+++ b/.github/workflows/version-check.yml
@@ -27,5 +27,5 @@ jobs:
 
       - name: Version Check
         run: |
-          version=$(grep -o 'LLVM_VERSION_\(MAJOR\|MINOR\|PATCH\) [0-9]\+' llvm/CMakeLists.txt  | cut -d ' ' -f 2 | tr "\n" "." | sed 's/.$//g')
+          version=$(grep -o 'LLVM_VERSION_\(MAJOR\|MINOR\|PATCH\) [0-9]\+' cmake/Modules/LLVMVersion.cmake  | cut -d ' ' -f 2 | tr "\n" "." | sed 's/.$//g')
           .github/workflows/version-check.py "$version"

From 183e8ecc97a996c24e920e7e9668bc65a0d19439 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 23 Jul 2024 11:15:26 +0100
Subject: [PATCH 003/427] [LV] Disable VPlan-based cost model for 19.x release.

As discussed in  https://github.com/llvm/llvm-project/pull/92555 flip
the default for the option added in
https://github.com/llvm/llvm-project/pull/99536 to true.

This restores the original behavior for the release branch to give the
VPlan-based cost model more time to mature on main.
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp                 | 2 +-
 .../test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll | 2 --
 .../Inputs/x86-loopvectorize-costmodel.ll.expected              | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d28b8fabe42e..68363abdb817a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -206,7 +206,7 @@ static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
     cl::desc("The maximum allowed number of runtime memory checks"));
 
 static cl::opt<bool> UseLegacyCostModel(
-    "vectorize-use-legacy-cost-model", cl::init(false), cl::Hidden,
+    "vectorize-use-legacy-cost-model", cl::init(true), cl::Hidden,
     cl::desc("Use the legacy cost model instead of the VPlan-based cost model. "
              "This option will be removed in the future."));
 
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index fc310f4163082..1a78eaf644723 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -135,7 +135,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
 ; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
 ; CHECK-NEXT:  Live-in vp<%0> = VF * UF
@@ -339,7 +338,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
 ; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
 ; CHECK-NEXT:  Live-in vp<%0> = VF * UF
diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
index 5aa270e76f4c8..e862bf87d265c 100644
--- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
@@ -17,7 +17,6 @@ define void @test() {
 ; CHECK:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
 ; CHECK:  LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
 ; CHECK:  LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
-; CHECK:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
 ;
 entry:
   br label %for.body

From c404c7ce7e917a42b8c7d677606c1e3dd476fbc7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 23 Jul 2024 12:00:53 +0200
Subject: [PATCH 004/427] Revert " [LICM] Fold associative binary ops to
 promote code hoisting  (#81608)"

This reverts commit f2ccf80136a01ca69f766becafb329db6c54c0c8.

The flag propagation code is incorrect.

(cherry picked from commit b48819dbcdb48fc737dc22304ac343e4fdbae9ff)
---
 llvm/lib/Transforms/Scalar/LICM.cpp           |  62 ----
 llvm/test/CodeGen/PowerPC/common-chain.ll     | 315 +++++++++---------
 llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll   |  16 +-
 llvm/test/Transforms/LICM/hoist-binop.ll      |  99 ------
 llvm/test/Transforms/LICM/sink-foldable.ll    |   4 +-
 .../LICM/update-scev-after-hoist.ll           |   2 +-
 6 files changed, 163 insertions(+), 335 deletions(-)
 delete mode 100644 llvm/test/Transforms/LICM/hoist-binop.ll

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index fe264503dee9e..91ef2b4b7c183 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -113,8 +113,6 @@ STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
 STATISTIC(NumIntAssociationsHoisted,
           "Number of invariant int expressions "
           "reassociated and hoisted out of the loop");
-STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
-                                    "reassociated and hoisted out of the loop");
 
 /// Memory promotion is enabled by default.
 static cl::opt<bool>
@@ -2781,60 +2779,6 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
   return true;
 }
 
-/// Reassociate general associative binary expressions of the form
-///
-/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-///
-/// where op is an associative binary op, LV is a loop variant, and C1 and C2
-/// are loop invariants that we want to hoist.
-///
-/// TODO: This can be extended to more cases such as
-/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
-/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
-/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
-static bool hoistBOAssociation(Instruction &I, Loop &L,
-                               ICFLoopSafetyInfo &SafetyInfo,
-                               MemorySSAUpdater &MSSAU, AssumptionCache *AC,
-                               DominatorTree *DT) {
-  BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
-  if (!BO || !BO->isAssociative())
-    return false;
-
-  Instruction::BinaryOps Opcode = BO->getOpcode();
-  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
-
-  // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-  if (Op0 && Op0->getOpcode() == Opcode) {
-    Value *LV = Op0->getOperand(0);
-    Value *C1 = Op0->getOperand(1);
-    Value *C2 = BO->getOperand(1);
-
-    if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
-        !L.isLoopInvariant(C2))
-      return false;
-
-    auto *Preheader = L.getLoopPreheader();
-    assert(Preheader && "Loop is not in simplify form?");
-    IRBuilder<> Builder(Preheader->getTerminator());
-    Value *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
-
-    auto *NewBO =
-        BinaryOperator::Create(Opcode, LV, Inv, BO->getName() + ".reass", BO);
-    NewBO->copyIRFlags(BO);
-    BO->replaceAllUsesWith(NewBO);
-    eraseInstruction(*BO, SafetyInfo, MSSAU);
-
-    // Note: (LV op C1) might not be erased if it has more uses than the one we
-    //       just replaced.
-    if (Op0->use_empty())
-      eraseInstruction(*Op0, SafetyInfo, MSSAU);
-
-    return true;
-  }
-
-  return false;
-}
-
 static bool hoistArithmetics(Instruction &I, Loop &L,
                              ICFLoopSafetyInfo &SafetyInfo,
                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2872,12 +2816,6 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
     return true;
   }
 
-  if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
-    ++NumHoisted;
-    ++NumBOAssociationsHoisted;
-    return true;
-  }
-
   return false;
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ccf0e4520f468..5f8c21e30f8fd 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -642,8 +642,8 @@ define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2
 ; CHECK-NEXT:    cmpdi r7, 0
 ; CHECK-NEXT:    ble cr0, .LBB6_4
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    add r6, r6, r4
+; CHECK-NEXT:    add r5, r5, r4
 ; CHECK-NEXT:    mtctr r7
 ; CHECK-NEXT:    sldi r4, r4, 1
 ; CHECK-NEXT:    add r5, r3, r5
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
 ; CHECK-NEXT:    std r9, -184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r8, -176(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r4, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r3, -160(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ble cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    sldi r4, r6, 2
-; CHECK-NEXT:    li r6, 1
-; CHECK-NEXT:    mr r0, r10
-; CHECK-NEXT:    std r10, -192(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    cmpdi r4, 1
-; CHECK-NEXT:    iselgt r4, r4, r6
-; CHECK-NEXT:    addi r7, r4, -1
-; CHECK-NEXT:    clrldi r6, r4, 63
-; CHECK-NEXT:    cmpldi r7, 3
+; CHECK-NEXT:    sldi r6, r6, 2
+; CHECK-NEXT:    li r7, 1
+; CHECK-NEXT:    mr r30, r10
+; CHECK-NEXT:    cmpdi r6, 1
+; CHECK-NEXT:    iselgt r7, r6, r7
+; CHECK-NEXT:    addi r8, r7, -1
+; CHECK-NEXT:    clrldi r6, r7, 63
+; CHECK-NEXT:    cmpldi r8, 3
 ; CHECK-NEXT:    blt cr0, .LBB7_4
 ; CHECK-NEXT:  # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT:    ld r0, -192(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r30, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r8, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    rldicl r7, r4, 62, 2
-; CHECK-NEXT:    ld r9, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r11, r0, r30
-; CHECK-NEXT:    add r4, r0, r0
-; CHECK-NEXT:    mulli r23, r0, 24
-; CHECK-NEXT:    add r14, r0, r8
-; CHECK-NEXT:    sldi r12, r0, 5
-; CHECK-NEXT:    add r31, r0, r9
-; CHECK-NEXT:    sldi r9, r9, 3
-; CHECK-NEXT:    sldi r18, r0, 4
-; CHECK-NEXT:    sldi r8, r8, 3
-; CHECK-NEXT:    add r10, r4, r4
-; CHECK-NEXT:    sldi r4, r30, 3
-; CHECK-NEXT:    sldi r11, r11, 3
-; CHECK-NEXT:    add r26, r12, r9
-; CHECK-NEXT:    add r16, r18, r9
-; CHECK-NEXT:    add r29, r12, r8
-; CHECK-NEXT:    add r19, r18, r8
-; CHECK-NEXT:    add r30, r12, r4
-; CHECK-NEXT:    mr r20, r4
-; CHECK-NEXT:    std r4, -200(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld r4, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r15, r5, r11
-; CHECK-NEXT:    sldi r11, r14, 3
-; CHECK-NEXT:    add r29, r5, r29
-; CHECK-NEXT:    add r28, r3, r26
-; CHECK-NEXT:    add r19, r5, r19
-; CHECK-NEXT:    add r21, r23, r9
-; CHECK-NEXT:    add r24, r23, r8
-; CHECK-NEXT:    add r14, r5, r11
-; CHECK-NEXT:    sldi r11, r31, 3
-; CHECK-NEXT:    add r25, r23, r20
-; CHECK-NEXT:    add r20, r18, r20
-; CHECK-NEXT:    add r30, r5, r30
-; CHECK-NEXT:    add r18, r3, r16
-; CHECK-NEXT:    add r24, r5, r24
-; CHECK-NEXT:    add r23, r3, r21
-; CHECK-NEXT:    add r27, r4, r26
-; CHECK-NEXT:    add r22, r4, r21
-; CHECK-NEXT:    add r17, r4, r16
-; CHECK-NEXT:    add r2, r4, r11
-; CHECK-NEXT:    rldicl r4, r7, 2, 1
-; CHECK-NEXT:    sub r7, r8, r9
-; CHECK-NEXT:    ld r8, -200(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r14, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mulli r24, r30, 24
+; CHECK-NEXT:    ld r16, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r15, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r3, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    rldicl r0, r7, 62, 2
+; CHECK-NEXT:    sldi r11, r30, 5
+; CHECK-NEXT:    sldi r19, r30, 4
+; CHECK-NEXT:    sldi r7, r14, 3
+; CHECK-NEXT:    add r14, r30, r14
+; CHECK-NEXT:    sldi r10, r16, 3
+; CHECK-NEXT:    sldi r12, r15, 3
+; CHECK-NEXT:    add r16, r30, r16
+; CHECK-NEXT:    add r15, r30, r15
+; CHECK-NEXT:    add r27, r11, r7
+; CHECK-NEXT:    add r22, r24, r7
+; CHECK-NEXT:    add r17, r19, r7
+; CHECK-NEXT:    sldi r2, r14, 3
+; CHECK-NEXT:    add r26, r24, r10
+; CHECK-NEXT:    add r25, r24, r12
+; CHECK-NEXT:    add r21, r19, r10
+; CHECK-NEXT:    add r20, r19, r12
+; CHECK-NEXT:    add r8, r11, r10
+; CHECK-NEXT:    sldi r16, r16, 3
+; CHECK-NEXT:    add r29, r5, r27
+; CHECK-NEXT:    add r28, r4, r27
+; CHECK-NEXT:    add r27, r3, r27
+; CHECK-NEXT:    add r24, r5, r22
+; CHECK-NEXT:    add r23, r4, r22
+; CHECK-NEXT:    add r22, r3, r22
+; CHECK-NEXT:    add r19, r5, r17
+; CHECK-NEXT:    add r18, r4, r17
+; CHECK-NEXT:    add r17, r3, r17
+; CHECK-NEXT:    add r14, r5, r2
+; CHECK-NEXT:    add r31, r4, r2
+; CHECK-NEXT:    add r2, r3, r2
+; CHECK-NEXT:    add r9, r5, r8
+; CHECK-NEXT:    add r8, r11, r12
 ; CHECK-NEXT:    add r26, r5, r26
 ; CHECK-NEXT:    add r25, r5, r25
 ; CHECK-NEXT:    add r21, r5, r21
 ; CHECK-NEXT:    add r20, r5, r20
 ; CHECK-NEXT:    add r16, r5, r16
-; CHECK-NEXT:    add r31, r5, r11
-; CHECK-NEXT:    add r11, r3, r11
-; CHECK-NEXT:    addi r4, r4, -4
-; CHECK-NEXT:    rldicl r4, r4, 62, 2
-; CHECK-NEXT:    sub r8, r8, r9
-; CHECK-NEXT:    li r9, 0
-; CHECK-NEXT:    addi r4, r4, 1
-; CHECK-NEXT:    mtctr r4
+; CHECK-NEXT:    add r8, r5, r8
+; CHECK-NEXT:    rldicl r3, r0, 2, 1
+; CHECK-NEXT:    addi r3, r3, -4
+; CHECK-NEXT:    sub r0, r12, r7
+; CHECK-NEXT:    sub r12, r10, r7
+; CHECK-NEXT:    li r7, 0
+; CHECK-NEXT:    mr r10, r30
+; CHECK-NEXT:    sldi r15, r15, 3
+; CHECK-NEXT:    add r15, r5, r15
+; CHECK-NEXT:    rldicl r3, r3, 62, 2
+; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_3: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfd f0, 0(r11)
-; CHECK-NEXT:    lfd f1, 0(r2)
-; CHECK-NEXT:    add r0, r0, r10
-; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfd f0, 0(r2)
 ; CHECK-NEXT:    lfd f1, 0(r31)
+; CHECK-NEXT:    add r3, r10, r30
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfd f1, 0(r14)
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    add r10, r3, r30
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfd f0, 0(r31)
-; CHECK-NEXT:    add r31, r31, r12
-; CHECK-NEXT:    lfdx f0, r11, r7
-; CHECK-NEXT:    lfdx f1, r2, r7
+; CHECK-NEXT:    stfd f0, 0(r14)
+; CHECK-NEXT:    add r14, r14, r11
+; CHECK-NEXT:    lfdx f0, r2, r0
+; CHECK-NEXT:    lfdx f1, r31, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r14, r9
+; CHECK-NEXT:    lfdx f1, r15, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r14, r9
-; CHECK-NEXT:    lfdx f0, r11, r8
-; CHECK-NEXT:    lfdx f1, r2, r8
-; CHECK-NEXT:    add r11, r11, r12
-; CHECK-NEXT:    add r2, r2, r12
+; CHECK-NEXT:    stfdx f0, r15, r7
+; CHECK-NEXT:    lfdx f0, r2, r12
+; CHECK-NEXT:    lfdx f1, r31, r12
+; CHECK-NEXT:    add r2, r2, r11
+; CHECK-NEXT:    add r31, r31, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r15, r9
+; CHECK-NEXT:    lfdx f1, r16, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r15, r9
-; CHECK-NEXT:    lfd f0, 0(r18)
-; CHECK-NEXT:    lfd f1, 0(r17)
+; CHECK-NEXT:    stfdx f0, r16, r7
+; CHECK-NEXT:    lfd f0, 0(r17)
+; CHECK-NEXT:    lfd f1, 0(r18)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r16, r9
+; CHECK-NEXT:    lfdx f1, r19, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r16, r9
-; CHECK-NEXT:    lfdx f0, r18, r7
-; CHECK-NEXT:    lfdx f1, r17, r7
+; CHECK-NEXT:    stfdx f0, r19, r7
+; CHECK-NEXT:    lfdx f0, r17, r0
+; CHECK-NEXT:    lfdx f1, r18, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r19, r9
+; CHECK-NEXT:    lfdx f1, r20, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r19, r9
-; CHECK-NEXT:    lfdx f0, r18, r8
-; CHECK-NEXT:    lfdx f1, r17, r8
-; CHECK-NEXT:    add r18, r18, r12
-; CHECK-NEXT:    add r17, r17, r12
+; CHECK-NEXT:    stfdx f0, r20, r7
+; CHECK-NEXT:    lfdx f0, r17, r12
+; CHECK-NEXT:    lfdx f1, r18, r12
+; CHECK-NEXT:    add r17, r17, r11
+; CHECK-NEXT:    add r18, r18, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r20, r9
+; CHECK-NEXT:    lfdx f1, r21, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r20, r9
-; CHECK-NEXT:    lfd f0, 0(r23)
-; CHECK-NEXT:    lfd f1, 0(r22)
+; CHECK-NEXT:    stfdx f0, r21, r7
+; CHECK-NEXT:    lfd f0, 0(r22)
+; CHECK-NEXT:    lfd f1, 0(r23)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r21, r9
+; CHECK-NEXT:    lfdx f1, r24, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r21, r9
-; CHECK-NEXT:    lfdx f0, r23, r7
-; CHECK-NEXT:    lfdx f1, r22, r7
+; CHECK-NEXT:    stfdx f0, r24, r7
+; CHECK-NEXT:    lfdx f0, r22, r0
+; CHECK-NEXT:    lfdx f1, r23, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r24, r9
+; CHECK-NEXT:    lfdx f1, r25, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r24, r9
-; CHECK-NEXT:    lfdx f0, r23, r8
-; CHECK-NEXT:    lfdx f1, r22, r8
-; CHECK-NEXT:    add r23, r23, r12
-; CHECK-NEXT:    add r22, r22, r12
+; CHECK-NEXT:    stfdx f0, r25, r7
+; CHECK-NEXT:    lfdx f0, r22, r12
+; CHECK-NEXT:    lfdx f1, r23, r12
+; CHECK-NEXT:    add r22, r22, r11
+; CHECK-NEXT:    add r23, r23, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r25, r9
+; CHECK-NEXT:    lfdx f1, r26, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r25, r9
-; CHECK-NEXT:    lfd f0, 0(r28)
-; CHECK-NEXT:    lfd f1, 0(r27)
+; CHECK-NEXT:    stfdx f0, r26, r7
+; CHECK-NEXT:    lfd f0, 0(r27)
+; CHECK-NEXT:    lfd f1, 0(r28)
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r26, r9
+; CHECK-NEXT:    lfdx f1, r29, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r26, r9
-; CHECK-NEXT:    lfdx f0, r28, r7
-; CHECK-NEXT:    lfdx f1, r27, r7
+; CHECK-NEXT:    stfdx f0, r29, r7
+; CHECK-NEXT:    lfdx f0, r27, r0
+; CHECK-NEXT:    lfdx f1, r28, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r29, r9
+; CHECK-NEXT:    lfdx f1, r8, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r29, r9
-; CHECK-NEXT:    lfdx f0, r28, r8
-; CHECK-NEXT:    lfdx f1, r27, r8
-; CHECK-NEXT:    add r28, r28, r12
-; CHECK-NEXT:    add r27, r27, r12
+; CHECK-NEXT:    stfdx f0, r8, r7
+; CHECK-NEXT:    lfdx f0, r27, r12
+; CHECK-NEXT:    lfdx f1, r28, r12
+; CHECK-NEXT:    add r27, r27, r11
+; CHECK-NEXT:    add r28, r28, r11
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r30, r9
+; CHECK-NEXT:    lfdx f1, r9, r7
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r30, r9
-; CHECK-NEXT:    add r9, r9, r12
+; CHECK-NEXT:    stfdx f0, r9, r7
+; CHECK-NEXT:    add r7, r7, r11
 ; CHECK-NEXT:    bdnz .LBB7_3
 ; CHECK-NEXT:  .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT:    ld r7, -192(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    cmpldi r6, 0
 ; CHECK-NEXT:    beq cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT:    ld r4, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r29, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    sldi r7, r7, 3
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r4, r4, 3
-; CHECK-NEXT:    add r3, r5, r4
-; CHECK-NEXT:    add r8, r29, r4
-; CHECK-NEXT:    add r9, r30, r4
-; CHECK-NEXT:    ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r4, r4, 3
-; CHECK-NEXT:    add r10, r5, r4
-; CHECK-NEXT:    add r11, r29, r4
-; CHECK-NEXT:    add r12, r30, r4
-; CHECK-NEXT:    ld r4, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r4, r0, r4
-; CHECK-NEXT:    sldi r0, r4, 3
-; CHECK-NEXT:    add r5, r5, r0
-; CHECK-NEXT:    add r4, r29, r0
-; CHECK-NEXT:    add r30, r30, r0
-; CHECK-NEXT:    li r0, 0
+; CHECK-NEXT:    ld r3, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    sldi r8, r30, 3
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r7, r5, r3
+; CHECK-NEXT:    add r9, r4, r3
+; CHECK-NEXT:    add r11, r0, r3
+; CHECK-NEXT:    ld r3, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r12, r5, r3
+; CHECK-NEXT:    add r30, r4, r3
+; CHECK-NEXT:    add r29, r0, r3
+; CHECK-NEXT:    ld r3, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r3, r10, r3
+; CHECK-NEXT:    li r10, 0
+; CHECK-NEXT:    sldi r3, r3, 3
+; CHECK-NEXT:    add r5, r5, r3
+; CHECK-NEXT:    add r4, r4, r3
+; CHECK-NEXT:    add r3, r0, r3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_6: # %for.body.epil
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfdx f0, r30, r0
-; CHECK-NEXT:    lfdx f1, r4, r0
+; CHECK-NEXT:    lfdx f0, r3, r10
+; CHECK-NEXT:    lfdx f1, r4, r10
 ; CHECK-NEXT:    addi r6, r6, -1
 ; CHECK-NEXT:    cmpldi r6, 0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-NEXT:    lfd f1, 0(r5)
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfd f0, 0(r5)
-; CHECK-NEXT:    add r5, r5, r7
-; CHECK-NEXT:    lfdx f0, r12, r0
-; CHECK-NEXT:    lfdx f1, r11, r0
+; CHECK-NEXT:    add r5, r5, r8
+; CHECK-NEXT:    lfdx f0, r29, r10
+; CHECK-NEXT:    lfdx f1, r30, r10
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r10, r0
+; CHECK-NEXT:    lfdx f1, r12, r10
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r10, r0
-; CHECK-NEXT:    lfdx f0, r9, r0
-; CHECK-NEXT:    lfdx f1, r8, r0
+; CHECK-NEXT:    stfdx f0, r12, r10
+; CHECK-NEXT:    lfdx f0, r11, r10
+; CHECK-NEXT:    lfdx f1, r9, r10
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
-; CHECK-NEXT:    lfdx f1, r3, r0
+; CHECK-NEXT:    lfdx f1, r7, r10
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r3, r0
-; CHECK-NEXT:    add r0, r0, r7
+; CHECK-NEXT:    stfdx f0, r7, r10
+; CHECK-NEXT:    add r10, r10, r8
 ; CHECK-NEXT:    bne cr0, .LBB7_6
 ; CHECK-NEXT:  .LBB7_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ld r2, -152(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index c733a01950603..4b032781c3764 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -30,16 +30,14 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
-; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r29, -24
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    .cfi_offset cr2, 8
 ; CHECK-NEXT:    .cfi_offset cr3, 8
 ; CHECK-NEXT:    .cfi_offset cr4, 8
-; CHECK-NEXT:    std r29, 40(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 48(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, 32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    bl call_2@notoc
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_13
 ; CHECK-NEXT:  # %bb.1: # %bb
@@ -67,11 +65,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-NEXT:  # %bb.6: # %bb32
 ; CHECK-NEXT:    #
+; CHECK-NEXT:    rlwinm r30, r30, 0, 24, 22
 ; CHECK-NEXT:    andi. r3, r30, 2
-; CHECK-NEXT:    rlwinm r29, r30, 0, 24, 22
 ; CHECK-NEXT:    mcrf cr2, cr0
 ; CHECK-NEXT:    bl call_4@notoc
-; CHECK-NEXT:    mr r30, r29
 ; CHECK-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-NEXT:  # %bb.7: # %bb37
 ; CHECK-NEXT:  .LBB0_8: # %bb22
@@ -92,13 +89,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    stdu r1, -144(r1)
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-BE-NEXT:    .cfi_offset lr, 16
-; CHECK-BE-NEXT:    .cfi_offset r28, -32
 ; CHECK-BE-NEXT:    .cfi_offset r29, -24
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
 ; CHECK-BE-NEXT:    .cfi_offset cr2, 8
-; CHECK-BE-NEXT:    std r28, 112(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    std r29, 120(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; CHECK-BE-NEXT:    bl call_2
@@ -131,12 +126,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-BE-NEXT:  # %bb.6: # %bb32
 ; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 22
 ; CHECK-BE-NEXT:    andi. r3, r29, 2
-; CHECK-BE-NEXT:    rlwinm r28, r29, 0, 24, 22
 ; CHECK-BE-NEXT:    mcrf cr2, cr0
 ; CHECK-BE-NEXT:    bl call_4
 ; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    mr r29, r28
 ; CHECK-BE-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.7: # %bb37
 ; CHECK-BE-NEXT:  .LBB0_8: # %bb22
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
deleted file mode 100644
index 1fae3561e7809..0000000000000
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=licm < %s | FileCheck %s
-
-; Adapted from:
-;   for(long i = 0; i < n; ++i)
-;     a[i] = (i*k) * v;
-define void @test(i64 %n, i64 %k) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_PH:%.*]]
-; CHECK:       for.ph:
-; CHECK-NEXT:    [[K_2:%.*]] = shl nuw nsw i64 [[K:%.*]], 1
-; CHECK-NEXT:    [[VEC_INIT:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[K]], i64 1
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[K_2]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = add <2 x i64> [[DOTSPLAT]], [[DOTSPLAT]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[FOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ [[VEC_INIT]], [[FOR_PH]] ], [ [[VEC_IND_NEXT_REASS:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    call void @use(<2 x i64> [[STEP_ADD]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT_REASS]] = add <2 x i64> [[VEC_IND]], [[INVARIANT_OP]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.ph
-
-for.ph:
-  %k.2 = shl nuw nsw i64 %k, 1
-  %vec.init = insertelement <2 x i64> zeroinitializer, i64 %k, i64 1
-  %.splatinsert = insertelement <2 x i64> poison, i64 %k.2, i64 0
-  %.splat = shufflevector <2 x i64> %.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
-  br label %for.body
-
-for.body:
-  %index = phi i64 [ 0, %for.ph ], [ %index.next, %for.body ]
-  %vec.ind = phi <2 x i64> [ %vec.init, %for.ph ], [ %vec.ind.next, %for.body ]
-  %step.add = add <2 x i64> %vec.ind, %.splat
-  call void @use(<2 x i64> %step.add)
-  %index.next = add nuw i64 %index, 4
-  %vec.ind.next = add <2 x i64> %step.add, %.splat
-  %cmp = icmp eq i64 %index.next, %n
-  br i1 %cmp, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-; Same as above but `%step.add` is unused and thus removed.
-define void @test_single_use(i64 %n, i64 %k) {
-; CHECK-LABEL: @test_single_use(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_PH:%.*]]
-; CHECK:       for.ph:
-; CHECK-NEXT:    [[K_2:%.*]] = shl nuw nsw i64 [[K:%.*]], 1
-; CHECK-NEXT:    [[VEC_INIT:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[K]], i64 1
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[K_2]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = add <2 x i64> [[DOTSPLAT]], [[DOTSPLAT]]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[FOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ [[VEC_INIT]], [[FOR_PH]] ], [ [[VEC_IND_NEXT_REASS:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT_REASS]] = add <2 x i64> [[VEC_IND]], [[INVARIANT_OP]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.ph
-
-for.ph:
-  %k.2 = shl nuw nsw i64 %k, 1
-  %vec.init = insertelement <2 x i64> zeroinitializer, i64 %k, i64 1
-  %.splatinsert = insertelement <2 x i64> poison, i64 %k.2, i64 0
-  %.splat = shufflevector <2 x i64> %.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
-  br label %for.body
-
-for.body:
-  %index = phi i64 [ 0, %for.ph ], [ %index.next, %for.body ]
-  %vec.ind = phi <2 x i64> [ %vec.init, %for.ph ], [ %vec.ind.next, %for.body ]
-  %step.add = add <2 x i64> %vec.ind, %.splat
-  %index.next = add nuw i64 %index, 4
-  %vec.ind.next = add <2 x i64> %step.add, %.splat
-  %cmp = icmp eq i64 %index.next, %n
-  br i1 %cmp, label %for.end, label %for.body
-
-for.end:
-  ret void
-}
-
-declare void @use(<2 x i64>)
diff --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll
index b0130dfbb0713..38577a5a12563 100644
--- a/llvm/test/Transforms/LICM/sink-foldable.ll
+++ b/llvm/test/Transforms/LICM/sink-foldable.ll
@@ -79,7 +79,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[I_ADDR_0:%.*]] = phi i32 [ [[ADD_REASS:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[I_ADDR_0:%.*]] = phi i32 [ [[ADD:%.*]], [[IF_END:%.*]] ]
 ; CHECK-NEXT:    [[P_ADDR_0:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[IF_END]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_ADDR_0]], [[J:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[LOOPEXIT0:%.*]]
@@ -97,7 +97,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ADD_PTR]], i64 [[IDX2_EXT]]
 ; CHECK-NEXT:    [[L1:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt ptr [[L1]], [[Q]]
-; CHECK-NEXT:    [[ADD_REASS]] = add nsw i32 [[I_ADDR]], 2
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD_I]], 1
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]]
 ; CHECK:       loopexit0:
 ; CHECK-NEXT:    [[P0:%.*]] = phi ptr [ null, [[FOR_COND]] ]
diff --git a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
index f01008036e9da..fc45b8fce1766 100644
--- a/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
+++ b/llvm/test/Transforms/LICM/update-scev-after-hoist.ll
@@ -2,7 +2,7 @@
 
 define i16 @main() {
 ; SCEV-EXPR:      Classifying expressions for: @main
-; SCEV-EXPR-NEXT:  %mul = phi i16 [ 1, %entry ], [ %mul.n.3.reass, %loop ]
+; SCEV-EXPR-NEXT:  %mul = phi i16 [ 1, %entry ], [ %mul.n.3, %loop ]
 ; SCEV-EXPR-NEXT:  -->  %mul U: [0,-15) S: [-32768,32753)		Exits: 4096		LoopDispositions: { %loop: Variant }
 ; SCEV-EXPR-NEXT:  %div = phi i16 [ 32767, %entry ], [ %div.n.3, %loop ]
 ; SCEV-EXPR-NEXT:  -->  %div U: [-2048,-32768) S: [-2048,-32768)		Exits: 7		LoopDispositions: { %loop: Variant }

From 843276aa2c97f46581309a441a7d55a45dd3d6c0 Mon Sep 17 00:00:00 2001
From: Oliver Hunt <oliver@apple.com>
Date: Tue, 23 Jul 2024 14:18:53 -0700
Subject: [PATCH 005/427] [clang][test] Add function type discrimination tests
 to static destructor tests (#99604)

I accidentally did not include tests for the setting up runtime calls when compiling with -fptrauth-function-pointer-type-discrimination

(cherry picked from commit 8be1325cb1903797ba3dce67087e395f9e080576)
---
 .../CodeGenCXX/ptrauth-static-destructors.cpp | 37 ++++++++++++++++---
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp b/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp
index 1240f26d329da..634450bf62ea9 100644
--- a/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp
+++ b/clang/test/CodeGenCXX/ptrauth-static-destructors.cpp
@@ -2,13 +2,27 @@
 // RUN:  | FileCheck %s --check-prefix=CXAATEXIT
 
 // RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s -o - \
-// RUN:    -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,DARWIN
+// RUN:    -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ATEXIT_DARWIN
 
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \
 // RUN:  | FileCheck %s --check-prefix=CXAATEXIT
 
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \
-// RUN:    -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ELF
+// RUN:    -fno-use-cxa-atexit | FileCheck %s --check-prefixes=ATEXIT,ATEXIT_ELF
+
+// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s \
+// RUN:  -fptrauth-function-pointer-type-discrimination  -o - | FileCheck %s --check-prefix=CXAATEXIT_DISC
+
+// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm -std=c++11 %s -o - \
+// RUN:   -fptrauth-function-pointer-type-discrimination  -fno-use-cxa-atexit \
+// RUN:  | FileCheck %s --check-prefixes=ATEXIT_DISC,ATEXIT_DISC_DARWIN
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s \
+// RUN:  -fptrauth-function-pointer-type-discrimination  -o - | FileCheck %s --check-prefix=CXAATEXIT_DISC
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm -std=c++11 %s -o - \
+// RUN:   -fptrauth-function-pointer-type-discrimination -fno-use-cxa-atexit \
+// RUN:  | FileCheck %s --check-prefixes=ATEXIT_DISC,ATEXIT_DISC_ELF
 
 class Foo {
  public:
@@ -21,11 +35,22 @@ Foo global;
 // CXAATEXIT: define internal void @__cxx_global_var_init()
 // CXAATEXIT:   call i32 @__cxa_atexit(ptr ptrauth (ptr @_ZN3FooD1Ev, i32 0), ptr @global, ptr @__dso_handle)
 
+// CXAATEXIT_DISC: define internal void @__cxx_global_var_init()
+// CXAATEXIT_DISC:   call i32 @__cxa_atexit(ptr ptrauth (ptr @_ZN3FooD1Ev, i32 0, i64 10942), ptr @global, ptr @__dso_handle)
 
 // ATEXIT: define internal void @__cxx_global_var_init()
 // ATEXIT:   %{{.*}} = call i32 @atexit(ptr ptrauth (ptr @__dtor_global, i32 0))
 
-// DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" {
-// ELF:    define internal void @__dtor_global() {{.*}} section ".text.startup" {
-// DARWIN:   %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global)
-// ELF:      call void @_ZN3FooD1Ev(ptr @global)
+// ATEXIT_DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" {
+// ATEXIT_ELF:    define internal void @__dtor_global() {{.*}} section ".text.startup" {
+// ATEXIT_DARWIN:   %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global)
+// ATEXIT_ELF:      call void @_ZN3FooD1Ev(ptr @global)
+
+// ATEXIT_DISC: define internal void @__cxx_global_var_init()
+// ATEXIT_DISC:   %{{.*}} = call i32 @atexit(ptr ptrauth (ptr @__dtor_global, i32 0, i64 10942))
+
+
+// ATEXIT_DISC_DARWIN: define internal void @__dtor_global() {{.*}} section "__TEXT,__StaticInit,regular,pure_instructions" {
+// ATEXIT_DISC_ELF:    define internal void @__dtor_global() {{.*}} section ".text.startup" {
+// ATEXIT_DISC_DARWIN:   %{{.*}} = call ptr @_ZN3FooD1Ev(ptr @global)
+// ATEXIT_DISC_ELF:      call void @_ZN3FooD1Ev(ptr @global)

From 95ed2d007951374c8ae905b2cce4be262865e442 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanak@gmail.com>
Date: Tue, 23 Jul 2024 14:39:58 -0700
Subject: [PATCH 006/427] [PAC][compiler-rt][UBSan] Strip signed vptr instead
 of authenticating it (#100153)

vptr cannot be authenticated without knowing the class type if it was
signed with type discrimination.

Co-authored-by: Oliver Hunt <oliver@apple.com>
(cherry picked from commit 0a6a3c152faf56e07dd4f9e89e534d2b97eeab56)
---
 compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp b/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
index 468a8fcd603f0..15788574dd995 100644
--- a/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
@@ -207,7 +207,7 @@ struct VtablePrefix {
   std::type_info *TypeInfo;
 };
 VtablePrefix *getVtablePrefix(void *Vtable) {
-  Vtable = ptrauth_auth_data(Vtable, ptrauth_key_cxx_vtable_pointer, 0);
+  Vtable = ptrauth_strip(Vtable, ptrauth_key_cxx_vtable_pointer);
   VtablePrefix *Vptr = reinterpret_cast<VtablePrefix*>(Vtable);
   VtablePrefix *Prefix = Vptr - 1;
   if (!IsAccessibleMemoryRange((uptr)Prefix, sizeof(VtablePrefix)))

From aa425eb0e2f5174e50ec84766861ae3f6186d39b Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Wed, 24 Jul 2024 12:08:43 +0800
Subject: [PATCH 007/427] [LoongArch] Fix codegen for ISD::ROTR (#100292)

This patch fixes the code generation for IR:

sext i32 (trunc i64 (rotr i64 %x, i64 %y) to i32) to i64

(cherry picked from commit e386aacb747b4512dedf481ad83e054d3dd641e6)
---
 .../Target/LoongArch/LoongArchInstrInfo.td    |  1 -
 llvm/test/CodeGen/LoongArch/rotl-rotr.ll      | 36 +++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 97f0e8d6a10c7..ec0d071453c3f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1144,7 +1144,6 @@ def : PatGprGpr<urem, MOD_DU>;
 def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<rotr, ROTR_D>;
 def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
-def : PatGprGpr_32<rotr, ROTR_W>;
 def : PatGprImm<rotr, ROTRI_D, uimm6>;
 def : PatGprImm_32<rotr, ROTRI_W, uimm5>;
 def : PatGprImm<loongarch_rotr_w, ROTRI_W, uimm5>;
diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
index b2d46f5c088ba..75461f5820984 100644
--- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
@@ -504,6 +504,42 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
   ret i64 %f
 }
 
+define signext i32 @rotr_64_trunc_32(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotr_64_trunc_32:
+; LA32:       # %bb.0:
+; LA32-NEXT:    srl.w $a3, $a0, $a2
+; LA32-NEXT:    xori $a4, $a2, 31
+; LA32-NEXT:    slli.w $a5, $a1, 1
+; LA32-NEXT:    sll.w $a4, $a5, $a4
+; LA32-NEXT:    or $a3, $a3, $a4
+; LA32-NEXT:    addi.w $a4, $a2, -32
+; LA32-NEXT:    slti $a5, $a4, 0
+; LA32-NEXT:    maskeqz $a3, $a3, $a5
+; LA32-NEXT:    srl.w $a1, $a1, $a4
+; LA32-NEXT:    masknez $a1, $a1, $a5
+; LA32-NEXT:    or $a1, $a3, $a1
+; LA32-NEXT:    sub.w $a3, $zero, $a2
+; LA32-NEXT:    sll.w $a0, $a0, $a3
+; LA32-NEXT:    ori $a3, $zero, 32
+; LA32-NEXT:    sub.w $a2, $a3, $a2
+; LA32-NEXT:    srai.w $a2, $a2, 31
+; LA32-NEXT:    and $a0, $a2, $a0
+; LA32-NEXT:    or $a0, $a1, $a0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: rotr_64_trunc_32:
+; LA64:       # %bb.0:
+; LA64-NEXT:    rotr.d $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    ret
+  %z = sub i64 64, %y
+  %b = lshr i64 %x, %y
+  %c = shl i64 %x, %z
+  %d = or i64 %b, %c
+  %e = trunc i64 %d to i32
+  ret i32 %e
+}
+
 define signext i32 @rotri_i32(i32 signext %a) nounwind {
 ; LA32-LABEL: rotri_i32:
 ; LA32:       # %bb.0:

From dcc22f984454ef3e390b6a9183b3f79ac4b860e7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 23 Jul 2024 12:54:00 -0500
Subject: [PATCH 008/427] [NVPTX] Fix internal indirect call prototypes not
 obeying the ABI (#100131)

Summary:
The NVPTX backend optimizes the ABI for functions that are internal,
however, this is not legal for indirect call prototypes. Previously, we
would modify the ABI on an aggregate byval type passed to an indirect
call prototype, which would make PTXAS error. This patch just passes the
function as a nullptr to force strict ABI compliance without
modification in the helper function.

Fixes https://github.com/llvm/llvm-project/issues/100055

(cherry picked from commit e0649a5dfc6b859d652318f578bc3d49674787a4)
---
 libc/config/gpu/entrypoints.txt             | 15 +---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |  5 +-
 llvm/test/CodeGen/NVPTX/indirect_byval.ll   | 94 +++++++++++++++++++++
 3 files changed, 101 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/indirect_byval.ll

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 42909cec55890..fa878d8999227 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -1,13 +1,3 @@
-if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  set(extra_entrypoints
-      # stdio.h entrypoints
-      libc.src.stdio.snprintf
-      libc.src.stdio.sprintf
-      libc.src.stdio.vsnprintf
-      libc.src.stdio.vsprintf
-  )
-endif()
-
 set(TARGET_LIBC_ENTRYPOINTS
     # assert.h entrypoints
     libc.src.assert.__assert_fail
@@ -186,13 +176,16 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.errno.errno
 
     # stdio.h entrypoints
-    ${extra_entrypoints}
     libc.src.stdio.clearerr
     libc.src.stdio.fclose
     libc.src.stdio.printf
     libc.src.stdio.vprintf
     libc.src.stdio.fprintf
     libc.src.stdio.vfprintf
+    libc.src.stdio.snprintf
+    libc.src.stdio.sprintf
+    libc.src.stdio.vsnprintf
+    libc.src.stdio.vsprintf
     libc.src.stdio.feof
     libc.src.stdio.ferror
     libc.src.stdio.fflush
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 44c1a2e50486c..6975412ce5d35 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1429,7 +1429,6 @@ std::string NVPTXTargetLowering::getPrototype(
 
   bool first = true;
 
-  const Function *F = CB.getFunction();
   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();
   for (unsigned i = 0, OIdx = 0; i != NumArgs; ++i, ++OIdx) {
     Type *Ty = Args[i].Ty;
@@ -1471,10 +1470,12 @@ std::string NVPTXTargetLowering::getPrototype(
       continue;
     }
 
+    // Indirect calls need strict ABI alignment so we disable optimizations by
+    // not providing a function to optimize.
     Type *ETy = Args[i].IndirectType;
     Align InitialAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
     Align ParamByValAlign =
-        getFunctionByValParamAlign(F, ETy, InitialAlign, DL);
+        getFunctionByValParamAlign(/*F=*/nullptr, ETy, InitialAlign, DL);
 
     O << ".param .align " << ParamByValAlign.value() << " .b8 ";
     O << "_";
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
new file mode 100644
index 0000000000000..ac6c4e262fd60
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.S = type { i8 }
+%struct.U = type { i64 }
+
+@ptr = external global ptr, align 8
+
+define internal i32 @foo() {
+; CHECK-LABEL: foo(
+; CHECK:       {
+; CHECK-NEXT:    .local .align 1 .b8 __local_depot0[2];
+; CHECK-NEXT:    .reg .b64 %SP;
+; CHECK-NEXT:    .reg .b64 %SPL;
+; CHECK-NEXT:    .reg .b16 %rs<2>;
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    mov.u64 %SPL, __local_depot0;
+; CHECK-NEXT:    cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT:    ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT:    ld.u8 %rs1, [%SP+1];
+; CHECK-NEXT:    add.u64 %rd2, %SP, 0;
+; CHECK-NEXT:    { // callseq 0, 0
+; CHECK-NEXT:    .param .align 1 .b8 param0[1];
+; CHECK-NEXT:    st.param.b8 [param0+0], %rs1;
+; CHECK-NEXT:    .param .b64 param1;
+; CHECK-NEXT:    st.param.b64 [param1+0], %rd2;
+; CHECK-NEXT:    .param .b32 retval0;
+; CHECK-NEXT:    prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
+; CHECK-NEXT:    call (retval0),
+; CHECK-NEXT:    %rd1,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0,
+; CHECK-NEXT:    param1
+; CHECK-NEXT:    )
+; CHECK-NEXT:    , prototype_0;
+; CHECK-NEXT:    ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT:    } // callseq 0
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+entry:
+  %s = alloca %struct.S, align 1
+  %agg.tmp = alloca %struct.S, align 1
+  %0 = load ptr, ptr @ptr, align 8
+  %call = call i32 %0(ptr byval(%struct.S) align 1 %agg.tmp, ptr noundef %s)
+  ret i32 %call
+}
+
+define internal i32 @bar() {
+; CHECK-LABEL: bar(
+; CHECK:         // @bar
+; CHECK-NEXT:  {
+; CHECK-NEXT:    .local .align 8 .b8 __local_depot1[16];
+; CHECK-NEXT:    .reg .b64 %SP;
+; CHECK-NEXT:    .reg .b64 %SPL;
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    mov.u64 %SPL, __local_depot1;
+; CHECK-NEXT:    cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT:    ld.global.u64 %rd1, [ptr];
+; CHECK-NEXT:    ld.u64 %rd2, [%SP+8];
+; CHECK-NEXT:    add.u64 %rd3, %SP, 0;
+; CHECK-NEXT:    { // callseq 1, 0
+; CHECK-NEXT:    .param .align 8 .b8 param0[8];
+; CHECK-NEXT:    st.param.b64 [param0+0], %rd2;
+; CHECK-NEXT:    .param .b64 param1;
+; CHECK-NEXT:    st.param.b64 [param1+0], %rd3;
+; CHECK-NEXT:    .param .b32 retval0;
+; CHECK-NEXT:    prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
+; CHECK-NEXT:    call (retval0),
+; CHECK-NEXT:    %rd1,
+; CHECK-NEXT:    (
+; CHECK-NEXT:    param0,
+; CHECK-NEXT:    param1
+; CHECK-NEXT:    )
+; CHECK-NEXT:    , prototype_1;
+; CHECK-NEXT:    ld.param.b32 %r1, [retval0+0];
+; CHECK-NEXT:    } // callseq 1
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+entry:
+  %s = alloca %struct.U, align 8
+  %agg.tmp = alloca %struct.U, align 8
+  %0 = load ptr, ptr @ptr, align 8
+  %call = call noundef i32 %0(ptr byval(%struct.U) align 8 %agg.tmp, ptr %s)
+  ret i32 %call
+}

From f1472feccdd0f626112f77882e580a79b385b184 Mon Sep 17 00:00:00 2001
From: azhan92 <alisonxzhang@gmail.com>
Date: Tue, 23 Jul 2024 09:51:13 -0400
Subject: [PATCH 009/427] [PowerPC] Add builtin_cpu_is P11 support (#99550)

This PR adds support for __builtin_cpu_is ("power11")

(cherry picked from commit 63b382bbde5994e8f2cec75883320e3ad9fd618f)
---
 clang/test/CodeGen/aix-builtin-cpu-is.c       |  4 ++
 clang/test/CodeGen/builtin-cpu-supports.c     | 72 ++++++++++++++++---
 .../llvm/TargetParser/PPCTargetParser.def     |  3 +
 3 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/clang/test/CodeGen/aix-builtin-cpu-is.c b/clang/test/CodeGen/aix-builtin-cpu-is.c
index e17cf7353511a..04644dd7020e0 100644
--- a/clang/test/CodeGen/aix-builtin-cpu-is.c
+++ b/clang/test/CodeGen/aix-builtin-cpu-is.c
@@ -50,6 +50,10 @@
 // RUN: %clang_cc1 -triple powerpc-ibm-aix7.2.0.0 -emit-llvm -o - %t.c | FileCheck %s -DVALUE=262144 \
 // RUN:   --check-prefix=CHECKOP
 
+// RUN: echo "int main() { return __builtin_cpu_is(\"power11\");}" > %t.c
+// RUN: %clang_cc1 -triple powerpc-ibm-aix7.2.0.0 -emit-llvm -o - %t.c | FileCheck %s -DVALUE=524288 \
+// RUN:   --check-prefix=CHECKOP
+
 // CHECK:     define i32 @main() #0 {
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   %retval = alloca i32, align 4
diff --git a/clang/test/CodeGen/builtin-cpu-supports.c b/clang/test/CodeGen/builtin-cpu-supports.c
index 88eb7b0fa786e..f960040ab094b 100644
--- a/clang/test/CodeGen/builtin-cpu-supports.c
+++ b/clang/test/CodeGen/builtin-cpu-supports.c
@@ -129,25 +129,69 @@ int v4() { return __builtin_cpu_supports("x86-64-v4"); }
 // CHECK-PPC:       if.else3:
 // CHECK-PPC-NEXT:    [[CPU_IS:%.*]] = call i32 @llvm.ppc.fixed.addr.ld(i32 3)
 // CHECK-PPC-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[CPU_IS]], 39
-// CHECK-PPC-NEXT:    br i1 [[TMP6]], label [[IF_THEN4:%.*]], label [[IF_END:%.*]]
+// CHECK-PPC-NEXT:    br i1 [[TMP6]], label [[IF_THEN4:%.*]], label [[IF_ELSE5:%.*]]
 // CHECK-PPC:       if.then4:
 // CHECK-PPC-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-PPC-NEXT:    [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-PPC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]]
 // CHECK-PPC-NEXT:    store i32 [[ADD]], ptr [[RETVAL]], align 4
 // CHECK-PPC-NEXT:    br label [[RETURN]]
+// CHECK-PPC:       if.else5:
+// CHECK-PPC-NEXT:    [[CPU_IS6:%.*]] = call i32 @llvm.ppc.fixed.addr.ld(i32 3)
+// CHECK-PPC-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[CPU_IS6]], 45
+// CHECK-PPC-NEXT:    br i1 [[TMP9]], label [[IF_THEN7:%.*]], label [[IF_ELSE9:%.*]]
+// CHECK-PPC:       if.then7:
+// CHECK-PPC-NEXT:    [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-PPC-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP10]], 3
+// CHECK-PPC-NEXT:    store i32 [[ADD8]], ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    br label [[RETURN]]
+// CHECK-PPC:       if.else9:
+// CHECK-PPC-NEXT:    [[CPU_IS10:%.*]] = call i32 @llvm.ppc.fixed.addr.ld(i32 3)
+// CHECK-PPC-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[CPU_IS10]], 46
+// CHECK-PPC-NEXT:    br i1 [[TMP11]], label [[IF_THEN11:%.*]], label [[IF_ELSE13:%.*]]
+// CHECK-PPC:       if.then11:
+// CHECK-PPC-NEXT:    [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-PPC-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP12]], 3
+// CHECK-PPC-NEXT:    store i32 [[SUB12]], ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    br label [[RETURN]]
+// CHECK-PPC:       if.else13:
+// CHECK-PPC-NEXT:    [[CPU_IS14:%.*]] = call i32 @llvm.ppc.fixed.addr.ld(i32 3)
+// CHECK-PPC-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[CPU_IS14]], 47
+// CHECK-PPC-NEXT:    br i1 [[TMP13]], label [[IF_THEN15:%.*]], label [[IF_ELSE17:%.*]]
+// CHECK-PPC:       if.then15:
+// CHECK-PPC-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-PPC-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP14]], 7
+// CHECK-PPC-NEXT:    store i32 [[ADD16]], ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    br label [[RETURN]]
+// CHECK-PPC:       if.else17:
+// CHECK-PPC-NEXT:    [[CPU_IS18:%.*]] = call i32 @llvm.ppc.fixed.addr.ld(i32 3)
+// CHECK-PPC-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[CPU_IS18]], 48
+// CHECK-PPC-NEXT:    br i1 [[TMP15]], label [[IF_THEN19:%.*]], label [[IF_END:%.*]]
+// CHECK-PPC:       if.then19:
+// CHECK-PPC-NEXT:    [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-PPC-NEXT:    [[SUB20:%.*]] = sub nsw i32 [[TMP16]], 7
+// CHECK-PPC-NEXT:    store i32 [[SUB20]], ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    br label [[RETURN]]
 // CHECK-PPC:       if.end:
-// CHECK-PPC-NEXT:    br label [[IF_END5:%.*]]
-// CHECK-PPC:       if.end5:
-// CHECK-PPC-NEXT:    br label [[IF_END6:%.*]]
-// CHECK-PPC:       if.end6:
-// CHECK-PPC-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-PPC-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP9]], 5
-// CHECK-PPC-NEXT:    store i32 [[ADD7]], ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    br label [[IF_END21:%.*]]
+// CHECK-PPC:       if.end21:
+// CHECK-PPC-NEXT:    br label [[IF_END22:%.*]]
+// CHECK-PPC:       if.end22:
+// CHECK-PPC-NEXT:    br label [[IF_END23:%.*]]
+// CHECK-PPC:       if.end23:
+// CHECK-PPC-NEXT:    br label [[IF_END24:%.*]]
+// CHECK-PPC:       if.end24:
+// CHECK-PPC-NEXT:    br label [[IF_END25:%.*]]
+// CHECK-PPC:       if.end25:
+// CHECK-PPC-NEXT:    br label [[IF_END26:%.*]]
+// CHECK-PPC:       if.end26:
+// CHECK-PPC-NEXT:    [[TMP17:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-PPC-NEXT:    [[ADD27:%.*]] = add nsw i32 [[TMP17]], 5
+// CHECK-PPC-NEXT:    store i32 [[ADD27]], ptr [[RETVAL]], align 4
 // CHECK-PPC-NEXT:    br label [[RETURN]]
 // CHECK-PPC:       return:
-// CHECK-PPC-NEXT:    [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK-PPC-NEXT:    ret i32 [[TMP10]]
+// CHECK-PPC-NEXT:    [[TMP18:%.*]] = load i32, ptr [[RETVAL]], align 4
+// CHECK-PPC-NEXT:    ret i32 [[TMP18]]
 //
 int test(int a) {
   if (__builtin_cpu_supports("arch_3_00")) // HWCAP2
@@ -156,6 +200,14 @@ int test(int a) {
     return a - 5;
   else if (__builtin_cpu_is("power7"))     // CPUID
     return a + a;
+  else if (__builtin_cpu_is("power8"))
+    return a + 3;
+  else if (__builtin_cpu_is("power9"))
+    return a - 3;
+  else if (__builtin_cpu_is("power10"))
+    return a + 7;
+  else if (__builtin_cpu_is("power11"))
+    return a - 7;
   return a + 5;
 }
 #endif
diff --git a/llvm/include/llvm/TargetParser/PPCTargetParser.def b/llvm/include/llvm/TargetParser/PPCTargetParser.def
index 44e97d56a059c..df956a68d75d6 100644
--- a/llvm/include/llvm/TargetParser/PPCTargetParser.def
+++ b/llvm/include/llvm/TargetParser/PPCTargetParser.def
@@ -40,6 +40,7 @@
 #undef AIX_PPC8_VALUE
 #undef AIX_PPC9_VALUE
 #undef AIX_PPC10_VALUE
+#undef AIX_PPC11_VALUE
 #else
 #ifndef PPC_LNX_FEATURE
 #define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
@@ -84,6 +85,7 @@
 #define AIX_PPC8_VALUE 0x00010000
 #define AIX_PPC9_VALUE 0x00020000
 #define AIX_PPC10_VALUE 0x00040000
+#define AIX_PPC11_VALUE 0x00080000
 
 // __builtin_cpu_is() and __builtin_cpu_supports() are supported only on Power7 and up on AIX.
 // PPC_CPU(Name, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID)
@@ -103,6 +105,7 @@ PPC_CPU("ppc476",SYS_CALL,44,BUILTIN_PPC_FALSE,0)
 PPC_CPU("power8",SYS_CALL,45,USE_SYS_CONF,AIX_PPC8_VALUE)
 PPC_CPU("power9",SYS_CALL,46,USE_SYS_CONF,AIX_PPC9_VALUE)
 PPC_CPU("power10",SYS_CALL,47,USE_SYS_CONF,AIX_PPC10_VALUE)
+PPC_CPU("power11",SYS_CALL,48,USE_SYS_CONF,AIX_PPC11_VALUE)
 #undef PPC_CPU
 
 // PPC features on Linux:

From 40af7ee9c176ad96465a6055369646e574522501 Mon Sep 17 00:00:00 2001
From: PaulXiCao <paulxicao7@gmail.com>
Date: Tue, 23 Jul 2024 15:11:44 +0000
Subject: [PATCH 010/427] [libc++][math] Fix undue overflowing of
 `std::hypot(x,y,z)` (#93350)

The 3-dimentionsional `std::hypot(x,y,z)` was sub-optimally implemented.
This lead to possible over-/underflows in (intermediate) results which
can be circumvented by this proposed change.

The idea is to to scale the arguments (see linked issue for full
discussion).

Tests have been added for problematic over- and underflows.

Closes #92782

(cherry picked from commit 9628777479a970db5d0c2d0b456dac6633864760)
---
 libcxx/include/__math/hypot.h                 | 89 ++++++++++++++++++
 libcxx/include/cmath                          | 25 +----
 .../test/libcxx/transitive_includes/cxx17.csv |  3 +
 .../test/libcxx/transitive_includes/cxx20.csv |  3 +
 .../test/libcxx/transitive_includes/cxx23.csv |  3 +
 .../test/libcxx/transitive_includes/cxx26.csv |  3 +
 .../test/std/numerics/c.math/cmath.pass.cpp   | 91 +++++++++++++++----
 libcxx/test/support/fp_compare.h              | 45 ++++-----
 8 files changed, 197 insertions(+), 65 deletions(-)

diff --git a/libcxx/include/__math/hypot.h b/libcxx/include/__math/hypot.h
index 1bf193a9ab7ee..61fd260c59409 100644
--- a/libcxx/include/__math/hypot.h
+++ b/libcxx/include/__math/hypot.h
@@ -15,10 +15,21 @@
 #include <__type_traits/is_same.h>
 #include <__type_traits/promote.h>
 
+#if _LIBCPP_STD_VER >= 17
+#  include <__algorithm/max.h>
+#  include <__math/abs.h>
+#  include <__math/roots.h>
+#  include <__utility/pair.h>
+#  include <limits>
+#endif
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __math {
@@ -41,8 +52,86 @@ inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type hypot(_A1 __x, _
   return __math::hypot((__result_type)__x, (__result_type)__y);
 }
 
+#if _LIBCPP_STD_VER >= 17
+// Factors needed to determine if over-/underflow might happen for `std::hypot(x,y,z)`.
+// returns [overflow_threshold, overflow_scale]
+template <class _Real>
+_LIBCPP_HIDE_FROM_ABI std::pair<_Real, _Real> __hypot_factors() {
+  static_assert(std::numeric_limits<_Real>::is_iec559);
+
+  if constexpr (std::is_same_v<_Real, float>) {
+    static_assert(-125 == std::numeric_limits<_Real>::min_exponent);
+    static_assert(+128 == std::numeric_limits<_Real>::max_exponent);
+    return {0x1.0p+62f, 0x1.0p-70f};
+  } else if constexpr (std::is_same_v<_Real, double>) {
+    static_assert(-1021 == std::numeric_limits<_Real>::min_exponent);
+    static_assert(+1024 == std::numeric_limits<_Real>::max_exponent);
+    return {0x1.0p+510, 0x1.0p-600};
+  } else { // long double
+    static_assert(std::is_same_v<_Real, long double>);
+
+    // preprocessor guard necessary, otherwise literals (e.g. `0x1.0p+8'190l`) throw warnings even when shielded by `if
+    // constexpr`
+#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
+    static_assert(sizeof(_Real) == sizeof(double));
+    return static_cast<std::pair<_Real, _Real>>(__math::__hypot_factors<double>());
+#  else
+    static_assert(sizeof(_Real) > sizeof(double));
+    static_assert(-16381 == std::numeric_limits<_Real>::min_exponent);
+    static_assert(+16384 == std::numeric_limits<_Real>::max_exponent);
+    return {0x1.0p+8190l, 0x1.0p-9000l};
+#  endif
+  }
+}
+
+// Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`.
+// The naive implementation might over-/underflow which is why this implementation is more involved:
+//    If the square of an argument might run into issues, we scale the arguments appropriately.
+// See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary.
+template <class _Real>
+_LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) {
+  const _Real __max_abs = std::max(__math::fabs(__x), std::max(__math::fabs(__y), __math::fabs(__z)));
+  const auto [__overflow_threshold, __overflow_scale] = __math::__hypot_factors<_Real>();
+  _Real __scale;
+  if (__max_abs > __overflow_threshold) { // x*x + y*y + z*z might overflow
+    __scale = __overflow_scale;
+    __x *= __scale;
+    __y *= __scale;
+    __z *= __scale;
+  } else if (__max_abs < 1 / __overflow_threshold) { // x*x + y*y + z*z might underflow
+    __scale = 1 / __overflow_scale;
+    __x *= __scale;
+    __y *= __scale;
+    __z *= __scale;
+  } else
+    __scale = 1;
+  return __math::sqrt(__x * __x + __y * __y + __z * __z) / __scale;
+}
+
+inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) { return __math::__hypot(__x, __y, __z); }
+
+inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) { return __math::__hypot(__x, __y, __z); }
+
+inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
+  return __math::__hypot(__x, __y, __z);
+}
+
+template <class _A1,
+          class _A2,
+          class _A3,
+          std::enable_if_t< is_arithmetic_v<_A1> && is_arithmetic_v<_A2> && is_arithmetic_v<_A3>, int> = 0 >
+_LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2, _A3>::type hypot(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT {
+  using __result_type = typename __promote<_A1, _A2, _A3>::type;
+  static_assert(!(
+      std::is_same_v<_A1, __result_type> && std::is_same_v<_A2, __result_type> && std::is_same_v<_A3, __result_type>));
+  return __math::__hypot(
+      static_cast<__result_type>(__x), static_cast<__result_type>(__y), static_cast<__result_type>(__z));
+}
+#endif
+
 } // namespace __math
 
 _LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
 
 #endif // _LIBCPP___MATH_HYPOT_H
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index 3c22604a683c3..6480c4678ce33 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -313,6 +313,7 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept
 */
 
 #include <__config>
+#include <__math/hypot.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_arithmetic.h>
 #include <__type_traits/is_constant_evaluated.h>
@@ -553,30 +554,6 @@ using ::scalbnl _LIBCPP_USING_IF_EXISTS;
 using ::tgammal _LIBCPP_USING_IF_EXISTS;
 using ::truncl _LIBCPP_USING_IF_EXISTS;
 
-#if _LIBCPP_STD_VER >= 17
-inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-
-template <class _A1, class _A2, class _A3>
-inline _LIBCPP_HIDE_FROM_ABI
-typename enable_if_t< is_arithmetic<_A1>::value && is_arithmetic<_A2>::value && is_arithmetic<_A3>::value,
-                      __promote<_A1, _A2, _A3> >::type
-hypot(_A1 __lcpp_x, _A2 __lcpp_y, _A3 __lcpp_z) _NOEXCEPT {
-  typedef typename __promote<_A1, _A2, _A3>::type __result_type;
-  static_assert(
-      !(is_same<_A1, __result_type>::value && is_same<_A2, __result_type>::value && is_same<_A3, __result_type>::value),
-      "");
-  return std::hypot((__result_type)__lcpp_x, (__result_type)__lcpp_y, (__result_type)__lcpp_z);
-}
-#endif
-
 template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isnan(_A1 __lcpp_x) _NOEXCEPT {
 #if __has_builtin(__builtin_isnan)
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 2c028462144ee..8099d2b79c4be 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -130,6 +130,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 982c2013e3417..384e51b101f31 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -135,6 +135,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index 8ffb71d8b566b..46b833d143f39 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -83,6 +83,9 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 8ffb71d8b566b..46b833d143f39 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -83,6 +83,9 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
index 9379084499792..19b5fd0cf8996 100644
--- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
@@ -12,14 +12,17 @@
 
 // <cmath>
 
+#include <array>
 #include <cmath>
 #include <limits>
 #include <type_traits>
 #include <cassert>
 
+#include "fp_compare.h"
 #include "test_macros.h"
 #include "hexfloat.h"
 #include "truncate_fp.h"
+#include "type_algorithms.h"
 
 // convertible to int/float/double/etc
 template <class T, int N=0>
@@ -1113,6 +1116,56 @@ void test_fmin()
     assert(std::fmin(1,0) == 0);
 }
 
+#if TEST_STD_VER >= 17
+struct TestHypot3 {
+  template <class Real>
+  void operator()() const {
+    const auto check = [](Real elem, Real abs_tol) {
+      assert(std::isfinite(std::hypot(elem, Real(0), Real(0))));
+      assert(fptest_close(std::hypot(elem, Real(0), Real(0)), elem, abs_tol));
+      assert(std::isfinite(std::hypot(elem, elem, Real(0))));
+      assert(fptest_close(std::hypot(elem, elem, Real(0)), std::sqrt(Real(2)) * elem, abs_tol));
+      assert(std::isfinite(std::hypot(elem, elem, elem)));
+      assert(fptest_close(std::hypot(elem, elem, elem), std::sqrt(Real(3)) * elem, abs_tol));
+    };
+
+    { // check for overflow
+      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
+        if constexpr (std::is_same_v<Real, float>)
+          return {1e20f, 1e16f};
+        else if constexpr (std::is_same_v<Real, double>)
+          return {1e300, 1e287};
+        else { // long double
+#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
+          return {1e300l, 1e287l}; // 64-bit
+#  else
+          return {1e4000l, 1e3985l}; // 80- or 128-bit
+#  endif
+        }
+      }();
+      check(elem, abs_tol);
+    }
+
+    { // check for underflow
+      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
+        if constexpr (std::is_same_v<Real, float>)
+          return {1e-20f, 1e-24f};
+        else if constexpr (std::is_same_v<Real, double>)
+          return {1e-287, 1e-300};
+        else { // long double
+#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
+          return {1e-287l, 1e-300l}; // 64-bit
+#  else
+          return {1e-3985l, 1e-4000l}; // 80- or 128-bit
+#  endif
+        }
+      }();
+      check(elem, abs_tol);
+    }
+  }
+};
+#endif
+
 void test_hypot()
 {
     static_assert((std::is_same<decltype(std::hypot((float)0, (float)0)), float>::value), "");
@@ -1135,25 +1188,31 @@ void test_hypot()
     static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous())), Ambiguous>::value), "");
     assert(std::hypot(3,4) == 5);
 
-#if TEST_STD_VER > 14
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (float)0)), float>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (bool)0, (float)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (unsigned short)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (unsigned long)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (unsigned long long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((int)0, (int)0, (int)0)), double>::value), "");
-    static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>::value), "");
+#if TEST_STD_VER >= 17
+    // clang-format off
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (float)0)),              float>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (bool)0,           (float)0)),              double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (unsigned short)0, (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long)0)),               double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (unsigned long)0)),      long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long long)0)),          double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (unsigned long long)0)), double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((int)0,   (int)0,            (int)0)),                double>));
+    static_assert((std::is_same_v<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>));
+    // clang-format on
 
     assert(std::hypot(2,3,6) == 7);
     assert(std::hypot(1,4,8) == 9);
+
+    // Check for undue over-/underflows of intermediate results.
+    // See discussion at https://github.com/llvm/llvm-project/issues/92782.
+    types::for_each(types::floating_point_types(), TestHypot3());
 #endif
 }
 
diff --git a/libcxx/test/support/fp_compare.h b/libcxx/test/support/fp_compare.h
index 1d1933b0bcd81..3088a211dadc3 100644
--- a/libcxx/test/support/fp_compare.h
+++ b/libcxx/test/support/fp_compare.h
@@ -9,39 +9,34 @@
 #ifndef SUPPORT_FP_COMPARE_H
 #define SUPPORT_FP_COMPARE_H
 
-#include <cmath>      // for std::abs
-#include <algorithm>  // for std::max
+#include <cmath>     // for std::abs
+#include <algorithm> // for std::max
 #include <cassert>
+#include <__config>
 
 // See https://www.boost.org/doc/libs/1_70_0/libs/test/doc/html/boost_test/testing_tools/extended_comparison/floating_point/floating_points_comparison_theory.html
 
-template<typename T>
-bool fptest_close(T val, T expected, T eps)
-{
-    constexpr T zero = T(0);
-    assert(eps >= zero);
+template <typename T>
+bool fptest_close(T val, T expected, T eps) {
+  _LIBCPP_CONSTEXPR T zero = T(0);
+  assert(eps >= zero);
 
-    // Handle the zero cases
-    if (eps      == zero) return val == expected;
-    if (val      == zero) return std::abs(expected) <= eps;
-    if (expected == zero) return std::abs(val)      <= eps;
+  // Handle the zero cases
+  if (eps == zero)
+    return val == expected;
+  if (val == zero)
+    return std::abs(expected) <= eps;
+  if (expected == zero)
+    return std::abs(val) <= eps;
 
-    return std::abs(val - expected) < eps
-        && std::abs(val - expected)/std::abs(val) < eps;
+  return std::abs(val - expected) < eps && std::abs(val - expected) / std::abs(val) < eps;
 }
 
-template<typename T>
-bool fptest_close_pct(T val, T expected, T percent)
-{
-    constexpr T zero = T(0);
-    assert(percent >= zero);
-
-    // Handle the zero cases
-    if (percent == zero) return val == expected;
-    T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
-
-    return fptest_close(val, expected, eps);
+template <typename T>
+bool fptest_close_pct(T val, T expected, T percent) {
+  assert(percent >= T(0));
+  T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
+  return fptest_close(val, expected, eps);
 }
 
-
 #endif // SUPPORT_FP_COMPARE_H

From a930d1f322674d03d9d88638a89fc4adcf3178b6 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 23 Jul 2024 18:03:28 +0200
Subject: [PATCH 011/427] [libc++][vector<bool>] Tests shrink_to_fit
 requirement. (#98009)

`vector<bool>`'s shrink_to_fit implementation is using the
"swap-to-free-container-resources-trick" which only shrinks when the
input vector is empty. Since the request to shrink_to_fit is
non-binding, this is a valid implementation. It is not a high-quality
implementation. Since `vector<bool>` is not a very popular container the
implementation has not been changed and only a test to validate the
non-growing property has been added.

This was discovered while investigating #95161.

(cherry picked from commit c2e438675754b83c31d7d5ba40cb13fe77e795de)
---
 .../vector.bool/shrink_to_fit.pass.cpp        | 45 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
index b39245cab7bf4..f8bcee31964bb 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
@@ -39,11 +39,54 @@ TEST_CONSTEXPR_CXX20 bool tests()
     return true;
 }
 
+#if TEST_STD_VER >= 23
+template <typename T>
+struct increasing_allocator {
+  using value_type         = T;
+  std::size_t min_elements = 1000;
+  increasing_allocator()   = default;
+
+  template <typename U>
+  constexpr increasing_allocator(const increasing_allocator<U>& other) noexcept : min_elements(other.min_elements) {}
+
+  constexpr std::allocation_result<T*> allocate_at_least(std::size_t n) {
+    if (n < min_elements)
+      n = min_elements;
+    min_elements += 1000;
+    return std::allocator<T>{}.allocate_at_least(n);
+  }
+  constexpr T* allocate(std::size_t n) { return allocate_at_least(n).ptr; }
+  constexpr void deallocate(T* p, std::size_t n) noexcept { std::allocator<T>{}.deallocate(p, n); }
+};
+
+template <typename T, typename U>
+bool operator==(increasing_allocator<T>, increasing_allocator<U>) {
+  return true;
+}
+
+// https://github.com/llvm/llvm-project/issues/95161
+constexpr bool test_increasing_allocator() {
+  std::vector<bool, increasing_allocator<bool>> v;
+  v.push_back(1);
+  std::size_t capacity = v.capacity();
+  v.shrink_to_fit();
+  assert(v.capacity() <= capacity);
+  assert(v.size() == 1);
+
+  return true;
+}
+#endif // TEST_STD_VER >= 23
+
 int main(int, char**)
 {
-    tests();
+  tests();
 #if TEST_STD_VER > 17
     static_assert(tests());
 #endif
+#if TEST_STD_VER >= 23
+    test_increasing_allocator();
+    static_assert(test_increasing_allocator());
+#endif // TEST_STD_VER >= 23
+
     return 0;
 }

From c5cd826c05b6ed1760a5be2d0996fbb912bad6c7 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 23 Jul 2024 18:13:22 +0200
Subject: [PATCH 012/427] [libc++][string] Fixes shrink_to_fit. (#97961)

This ensures that shrink_to_fit does not increase the allocated size.

Partly addresses #95161

(cherry picked from commit d0ca9f23e8f25b0509c3ff34ed215508b39ea6e7)
---
 libcxx/include/string                         | 17 ++++++--
 .../string.capacity/shrink_to_fit.pass.cpp    | 41 +++++++++++++++++++
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/string b/libcxx/include/string
index ba86a32090825..9fa979e3a5178 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -3358,23 +3358,34 @@ basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target
     __p        = __get_long_pointer();
   } else {
     if (__target_capacity > __cap) {
+      // Extend
+      // - called from reserve should propagate the exception thrown.
       auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
       __new_data        = __allocation.ptr;
       __target_capacity = __allocation.count - 1;
     } else {
+      // Shrink
+      // - called from shrink_to_fit should not throw.
+      // - called from reserve may throw but is not required to.
 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
       try {
 #endif // _LIBCPP_HAS_NO_EXCEPTIONS
         auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
+
+        // The Standard mandates shrink_to_fit() does not increase the capacity.
+        // With equal capacity keep the existing buffer. This avoids extra work
+        // due to swapping the elements.
+        if (__allocation.count - 1 > __target_capacity) {
+          __alloc_traits::deallocate(__alloc(), __allocation.ptr, __allocation.count);
+          __annotate_new(__sz); // Undoes the __annotate_delete()
+          return;
+        }
         __new_data        = __allocation.ptr;
         __target_capacity = __allocation.count - 1;
 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
       } catch (...) {
         return;
       }
-#else  // _LIBCPP_HAS_NO_EXCEPTIONS
-      if (__new_data == nullptr)
-        return;
 #endif // _LIBCPP_HAS_NO_EXCEPTIONS
     }
     __begin_lifetime(__new_data, __target_capacity + 1);
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.pass.cpp
index 057050cdcf7fa..6f5e43d1341f5 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.pass.cpp
@@ -63,8 +63,49 @@ TEST_CONSTEXPR_CXX20 bool test() {
   return true;
 }
 
+#if TEST_STD_VER >= 23
+std::size_t min_bytes = 1000;
+
+template <typename T>
+struct increasing_allocator {
+  using value_type       = T;
+  increasing_allocator() = default;
+  template <typename U>
+  increasing_allocator(const increasing_allocator<U>&) noexcept {}
+  std::allocation_result<T*> allocate_at_least(std::size_t n) {
+    std::size_t allocation_amount = n * sizeof(T);
+    if (allocation_amount < min_bytes)
+      allocation_amount = min_bytes;
+    min_bytes += 1000;
+    return {static_cast<T*>(::operator new(allocation_amount)), allocation_amount / sizeof(T)};
+  }
+  T* allocate(std::size_t n) { return allocate_at_least(n).ptr; }
+  void deallocate(T* p, std::size_t) noexcept { ::operator delete(static_cast<void*>(p)); }
+};
+
+template <typename T, typename U>
+bool operator==(increasing_allocator<T>, increasing_allocator<U>) {
+  return true;
+}
+
+// https://github.com/llvm/llvm-project/issues/95161
+void test_increasing_allocator() {
+  std::basic_string<char, std::char_traits<char>, increasing_allocator<char>> s{
+      "String does not fit in the internal buffer"};
+  std::size_t capacity = s.capacity();
+  std::size_t size     = s.size();
+  s.shrink_to_fit();
+  assert(s.capacity() <= capacity);
+  assert(s.size() == size);
+  LIBCPP_ASSERT(is_string_asan_correct(s));
+}
+#endif // TEST_STD_VER >= 23
+
 int main(int, char**) {
   test();
+#if TEST_STD_VER >= 23
+  test_increasing_allocator();
+#endif
 #if TEST_STD_VER > 17
   static_assert(test());
 #endif

From 518cef7f38ea539f14aedbf3a08a9989960fc355 Mon Sep 17 00:00:00 2001
From: azhan92 <alisonxzhang@gmail.com>
Date: Tue, 23 Jul 2024 09:49:41 -0400
Subject: [PATCH 013/427] [PowerPC] Add support for -mcpu=pwr11 / -mtune=pwr11
 (#99511)

This PR adds support for -mcpu=pwr11/power11 and -mtune=pwr11/power11 in
clang and llvm.

(cherry picked from commit 1df4d866cca51eeab8f012a97cc50957b45971fe)
---
 clang/lib/Basic/Targets/PPC.cpp               | 39 ++++++++++++-------
 clang/lib/Basic/Targets/PPC.h                 | 19 ++++++---
 clang/lib/Driver/ToolChains/Arch/PPC.cpp      |  3 ++
 clang/test/Misc/target-invalid-cpu-note.c     |  2 +-
 clang/test/Preprocessor/init-ppc64.c          | 22 +++++++++++
 llvm/lib/Target/PowerPC/PPC.td                | 20 ++++++++--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  3 ++
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  1 +
 llvm/lib/Target/PowerPC/PPCSubtarget.h        |  1 +
 .../Target/PowerPC/PPCTargetTransformInfo.cpp |  4 +-
 llvm/lib/TargetParser/Host.cpp                |  7 ++++
 llvm/test/CodeGen/PowerPC/check-cpu.ll        |  6 ++-
 llvm/test/CodeGen/PowerPC/mma-acc-spill.ll    |  7 ++++
 ...{p10-constants.ll => p10-p11-constants.ll} | 12 +++++-
 llvm/unittests/TargetParser/Host.cpp          |  1 +
 15 files changed, 120 insertions(+), 27 deletions(-)
 rename llvm/test/CodeGen/PowerPC/{p10-constants.ll => p10-p11-constants.ll} (94%)

diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 4ba4a49311d36..9ff54083c923b 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -385,6 +385,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("_ARCH_PWR9");
   if (ArchDefs & ArchDefinePwr10)
     Builder.defineMacro("_ARCH_PWR10");
+  if (ArchDefs & ArchDefinePwr11)
+    Builder.defineMacro("_ARCH_PWR11");
   if (ArchDefs & ArchDefineA2)
     Builder.defineMacro("_ARCH_A2");
   if (ArchDefs & ArchDefineE500)
@@ -622,10 +624,17 @@ bool PPCTargetInfo::initFeatureMap(
     addP10SpecificFeatures(Features);
   }
 
-  // Future CPU should include all of the features of Power 10 as well as any
+  // Power11 includes all the same features as Power10 plus any features
+  // specific to the Power11 core.
+  if (CPU == "pwr11" || CPU == "power11") {
+    initFeatureMap(Features, Diags, "pwr10", FeaturesVec);
+    addP11SpecificFeatures(Features);
+  }
+
+  // Future CPU should include all of the features of Power 11 as well as any
   // additional features (yet to be determined) specific to it.
   if (CPU == "future") {
-    initFeatureMap(Features, Diags, "pwr10", FeaturesVec);
+    initFeatureMap(Features, Diags, "pwr11", FeaturesVec);
     addFutureSpecificFeatures(Features);
   }
 
@@ -696,6 +705,10 @@ void PPCTargetInfo::addP10SpecificFeatures(
   Features["isa-v31-instructions"] = true;
 }
 
+// Add any Power11 specific features.
+void PPCTargetInfo::addP11SpecificFeatures(
+    llvm::StringMap<bool> &Features) const {}
+
 // Add features specific to the "Future" CPU.
 void PPCTargetInfo::addFutureSpecificFeatures(
     llvm::StringMap<bool> &Features) const {}
@@ -870,17 +883,17 @@ ArrayRef<TargetInfo::AddlRegName> PPCTargetInfo::getGCCAddlRegNames() const {
 }
 
 static constexpr llvm::StringLiteral ValidCPUNames[] = {
-    {"generic"},     {"440"},     {"450"},    {"601"},       {"602"},
-    {"603"},         {"603e"},    {"603ev"},  {"604"},       {"604e"},
-    {"620"},         {"630"},     {"g3"},     {"7400"},      {"g4"},
-    {"7450"},        {"g4+"},     {"750"},    {"8548"},      {"970"},
-    {"g5"},          {"a2"},      {"e500"},   {"e500mc"},    {"e5500"},
-    {"power3"},      {"pwr3"},    {"power4"}, {"pwr4"},      {"power5"},
-    {"pwr5"},        {"power5x"}, {"pwr5x"},  {"power6"},    {"pwr6"},
-    {"power6x"},     {"pwr6x"},   {"power7"}, {"pwr7"},      {"power8"},
-    {"pwr8"},        {"power9"},  {"pwr9"},   {"power10"},   {"pwr10"},
-    {"powerpc"},     {"ppc"},     {"ppc32"},  {"powerpc64"}, {"ppc64"},
-    {"powerpc64le"}, {"ppc64le"}, {"future"}};
+    {"generic"},   {"440"},     {"450"},         {"601"},     {"602"},
+    {"603"},       {"603e"},    {"603ev"},       {"604"},     {"604e"},
+    {"620"},       {"630"},     {"g3"},          {"7400"},    {"g4"},
+    {"7450"},      {"g4+"},     {"750"},         {"8548"},    {"970"},
+    {"g5"},        {"a2"},      {"e500"},        {"e500mc"},  {"e5500"},
+    {"power3"},    {"pwr3"},    {"power4"},      {"pwr4"},    {"power5"},
+    {"pwr5"},      {"power5x"}, {"pwr5x"},       {"power6"},  {"pwr6"},
+    {"power6x"},   {"pwr6x"},   {"power7"},      {"pwr7"},    {"power8"},
+    {"pwr8"},      {"power9"},  {"pwr9"},        {"power10"}, {"pwr10"},
+    {"power11"},   {"pwr11"},   {"powerpc"},     {"ppc"},     {"ppc32"},
+    {"powerpc64"}, {"ppc64"},   {"powerpc64le"}, {"ppc64le"}, {"future"}};
 
 bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
   return llvm::is_contained(ValidCPUNames, Name);
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index b15ab6fbcf492..6d5d8dd54d013 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -44,8 +44,9 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
     ArchDefinePwr8 = 1 << 12,
     ArchDefinePwr9 = 1 << 13,
     ArchDefinePwr10 = 1 << 14,
-    ArchDefineFuture = 1 << 15,
-    ArchDefineA2 = 1 << 16,
+    ArchDefinePwr11 = 1 << 15,
+    ArchDefineFuture = 1 << 16,
+    ArchDefineA2 = 1 << 17,
     ArchDefineE500 = 1 << 18
   } ArchDefineTypes;
 
@@ -166,11 +167,16 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
                          ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x |
                          ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr |
                          ArchDefinePpcsq)
+              .Cases("power11", "pwr11",
+                     ArchDefinePwr11 | ArchDefinePwr10 | ArchDefinePwr9 |
+                         ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 |
+                         ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 |
+                         ArchDefinePpcgr | ArchDefinePpcsq)
               .Case("future",
-                    ArchDefineFuture | ArchDefinePwr10 | ArchDefinePwr9 |
-                        ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 |
-                        ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 |
-                        ArchDefinePpcgr | ArchDefinePpcsq)
+                    ArchDefineFuture | ArchDefinePwr11 | ArchDefinePwr10 |
+                        ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 |
+                        ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 |
+                        ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
               .Cases("8548", "e500", ArchDefineE500)
               .Default(ArchDefineNone);
     }
@@ -192,6 +198,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
                  const std::vector<std::string> &FeaturesVec) const override;
 
   void addP10SpecificFeatures(llvm::StringMap<bool> &Features) const;
+  void addP11SpecificFeatures(llvm::StringMap<bool> &Features) const;
   void addFutureSpecificFeatures(llvm::StringMap<bool> &Features) const;
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
index 634c096523319..acd5757d6ea97 100644
--- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
@@ -70,6 +70,7 @@ static std::string normalizeCPUName(StringRef CPUName, const llvm::Triple &T) {
       .Case("power8", "pwr8")
       .Case("power9", "pwr9")
       .Case("power10", "pwr10")
+      .Case("power11", "pwr11")
       .Case("future", "future")
       .Case("powerpc", "ppc")
       .Case("powerpc64", "ppc64")
@@ -103,6 +104,8 @@ const char *ppc::getPPCAsmModeForCPU(StringRef Name) {
       .Case("power9", "-mpower9")
       .Case("pwr10", "-mpower10")
       .Case("power10", "-mpower10")
+      .Case("pwr11", "-mpower11")
+      .Case("power11", "-mpower11")
       .Default("-many");
 }
 
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index a5f9ffa21220a..4d6759dd81537 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -57,7 +57,7 @@
 
 // RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC
 // PPC: error: unknown target CPU 'not-a-cpu'
-// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}}
+// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, power11, pwr11, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}}
 
 // RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS
 // MIPS: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c
index 42e5232824de7..56164beb913d5 100644
--- a/clang/test/Preprocessor/init-ppc64.c
+++ b/clang/test/Preprocessor/init-ppc64.c
@@ -632,6 +632,27 @@
 // PPCPOWER10:#define __PCREL__ 1
 // PPCPOWER10-NOT:#define __ROP_PROTECT__ 1
 //
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu pwr11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu power11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s
+//
+// PPCPOWER11:#define _ARCH_PPC 1
+// PPCPOWER11:#define _ARCH_PPC64 1
+// PPCPOWER11:#define _ARCH_PPCGR 1
+// PPCPOWER11:#define _ARCH_PPCSQ 1
+// PPCPOWER11:#define _ARCH_PWR10 1
+// PPCPOWER11:#define _ARCH_PWR11 1
+// PPCPOWER11:#define _ARCH_PWR4 1
+// PPCPOWER11:#define _ARCH_PWR5 1
+// PPCPOWER11:#define _ARCH_PWR5X 1
+// PPCPOWER11:#define _ARCH_PWR6 1
+// PPCPOWER11-NOT:#define _ARCH_PWR6X 1
+// PPCPOWER11:#define _ARCH_PWR7 1
+// PPCPOWER11:#define _ARCH_PWR8 1
+// PPCPOWER11:#define _ARCH_PWR9 1
+// PPCPOWER11:#define __MMA__ 1
+// PPCPOWER11:#define __PCREL__ 1
+// PPCPOWER11-NOT:#define __ROP_PROTECT__ 1
+//
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu future -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCFUTURE %s
 //
 // PPCFUTURE:#define _ARCH_PPC 1
@@ -639,6 +660,7 @@
 // PPCFUTURE:#define _ARCH_PPCGR 1
 // PPCFUTURE:#define _ARCH_PPCSQ 1
 // PPCFUTURE:#define _ARCH_PWR10 1
+// PPCFUTURE:#define _ARCH_PWR11 1
 // PPCFUTURE:#define _ARCH_PWR4 1
 // PPCFUTURE:#define _ARCH_PWR5 1
 // PPCFUTURE:#define _ARCH_PWR5X 1
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 84ef582c029d3..da31a993b9c69 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -52,6 +52,7 @@ def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
 def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
 def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
 def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">;
+def DirectivePwr11: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR11", "">;
 def DirectivePwrFuture
     : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
 
@@ -467,13 +468,25 @@ def ProcessorFeatures {
   list<SubtargetFeature> P10Features =
     !listconcat(P10InheritableFeatures, P10SpecificFeatures);
 
-  // Future
-  // For future CPU we assume that all of the existing features from Power10
+  // Power11
+  // For P11 CPU we assume that all the existing features from Power10
   // still exist with the exception of those we know are Power10 specific.
+  list<SubtargetFeature> P11AdditionalFeatures =
+    [DirectivePwr11];
+  list<SubtargetFeature> P11SpecificFeatures =
+    [];
+  list<SubtargetFeature> P11InheritableFeatures =
+    !listconcat(P10InheritableFeatures, P11AdditionalFeatures);
+  list<SubtargetFeature> P11Features =
+    !listconcat(P11InheritableFeatures, P11SpecificFeatures);
+
+  // Future
+  // For future CPU we assume that all of the existing features from Power11
+  // still exist with the exception of those we know are Power11 specific.
   list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture];
   list<SubtargetFeature> FutureSpecificFeatures = [];
   list<SubtargetFeature> FutureInheritableFeatures =
-    !listconcat(P10InheritableFeatures, FutureAdditionalFeatures);
+    !listconcat(P11InheritableFeatures, FutureAdditionalFeatures);
   list<SubtargetFeature> FutureFeatures =
     !listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
 }
@@ -672,6 +685,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
 def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
 def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr11", P10Model, ProcessorFeatures.P11Features>;
 // No scheduler model for future CPU.
 def : ProcessorModel<"future", NoSchedModel,
                   ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 898d1f80d0564..aaf0449a55387 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1469,6 +1469,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   case PPC::DIR_PWR8:
   case PPC::DIR_PWR9:
   case PPC::DIR_PWR10:
+  case PPC::DIR_PWR11:
   case PPC::DIR_PWR_FUTURE:
     setPrefLoopAlignment(Align(16));
     setPrefFunctionAlignment(Align(16));
@@ -16664,6 +16665,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   case PPC::DIR_PWR8:
   case PPC::DIR_PWR9:
   case PPC::DIR_PWR10:
+  case PPC::DIR_PWR11:
   case PPC::DIR_PWR_FUTURE: {
     if (!ML)
       break;
@@ -18046,6 +18048,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
       return true;
     case PPC::DIR_PWR9:
     case PPC::DIR_PWR10:
+    case PPC::DIR_PWR11:
     case PPC::DIR_PWR_FUTURE:
       //  type        mul     add    shl
       // scalar        5       2      2
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2d3c520429f2a..81f16eb1a905b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3485,6 +3485,7 @@ unsigned PPCInstrInfo::getSpillTarget() const {
   // With P10, we may need to spill paired vector registers or accumulator
   // registers. MMA implies paired vectors, so we can just check that.
   bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
+  // P11 uses the P10 target.
   return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
                                    2 : Subtarget.hasP9Vector() ?
                                    1 : 0;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index bf35f8ec151b1..2079dc0acc3cf 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -61,6 +61,7 @@ enum {
   DIR_PWR8,
   DIR_PWR9,
   DIR_PWR10,
+  DIR_PWR11,
   DIR_PWR_FUTURE,
   DIR_64
 };
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3fa35efc2d159..b7bdbeb535d52 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -504,7 +504,7 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
   // Assume that Future CPU has the same cache line size as the others.
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
       Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
-      Directive == PPC::DIR_PWR_FUTURE)
+      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
     return 128;
 
   // On other processors return a default of 64 bytes.
@@ -538,7 +538,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
   // Assume that future is the same as the others.
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
       Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
-      Directive == PPC::DIR_PWR_FUTURE)
+      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
     return 12;
 
   // For most things, modern systems have two execution units (and
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index fda085f880096..7e637cba4cfbc 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -150,6 +150,7 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
       .Case("POWER8NVL", "pwr8")
       .Case("POWER9", "pwr9")
       .Case("POWER10", "pwr10")
+      .Case("POWER11", "pwr11")
       // FIXME: If we get a simulator or machine with the capabilities of
       // mcpu=future, we should revisit this and add the name reported by the
       // simulator/machine.
@@ -1549,6 +1550,12 @@ StringRef sys::getHostCPUName() {
   case 0x40000:
 #endif
     return "pwr10";
+#ifdef POWER_11
+  case POWER_11:
+#else
+  case 0x80000:
+#endif
+    return "pwr11";
   default:
     return "generic";
   }
diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll
index e1a201427a410..1dc532cb428f4 100644
--- a/llvm/test/CodeGen/PowerPC/check-cpu.ll
+++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll
@@ -3,6 +3,10 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:     -mcpu=future < %s 2>&1 | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr11 < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr11 < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:     -mcpu=pwr10 < %s 2>&1 | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:     -mcpu=pwr10 < %s 2>&1 | FileCheck %s
@@ -13,7 +17,7 @@
 
 
 
-; Test -mcpu=[pwr9|pwr10|future] is recognized on PowerPC.
+; Test -mcpu=[pwr9|pwr10|pwr11|future] is recognized on PowerPC.
 
 ; CHECK-NOT: is not a recognized processor for this target
 ; CHECK:     .text
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index 8d03594fe1bfd..681f81d74794d 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -6,6 +6,13 @@
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
 
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
 declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
 declare void @foo()
diff --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
similarity index 94%
rename from llvm/test/CodeGen/PowerPC/p10-constants.ll
rename to llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
index 77472afd9c3d4..5f6a345bdd938 100644
--- a/llvm/test/CodeGen/PowerPC/p10-constants.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
@@ -8,7 +8,17 @@
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN:   FileCheck %s --check-prefix=CHECK32
 
-; These test cases aim to test constant materialization using the pli instruction on Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
+; RUN:   -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s --check-prefix=CHECK32
+
+; These test cases aim to test constant materialization using the pli instruction on Power10 and Power11.
 
 define  signext i32 @t_16BitsMinRequiring34Bits() {
 ; CHECK-LABEL: t_16BitsMinRequiring34Bits:
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 61921a99e1711..f8dd1d3a60a00 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -536,6 +536,7 @@ TEST(HostTest, AIXHostCPUDetect) {
                        .Case("POWER 8\n", "pwr8")
                        .Case("POWER 9\n", "pwr9")
                        .Case("POWER 10\n", "pwr10")
+                       .Case("POWER 11\n", "pwr11")
                        .Default("unknown");
 
   StringRef HostCPU = sys::getHostCPUName();

From 43cec9d5512692315a749ccb080dcd04561453f3 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <jan_sjodin@yahoo.com>
Date: Wed, 24 Jul 2024 09:57:39 -0400
Subject: [PATCH 014/427] [clang][OpenMP] Propoagate debug location to
 OMPIRBuilder reduction codegen (#100358)

This patch propagates the debug location from Clang to the
OpenMPIRBuilder.

Fixes https://github.com/llvm/llvm-project/issues/97458

(cherry picked from commit 5b15d9c441810121c23f9f421bbb007fd4c448e8)
---
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index f5bd4a141cc2d..8965a14d88a6f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1695,7 +1695,8 @@ void CGOpenMPRuntimeGPU::emitReduction(
                          CGF.AllocaInsertPt->getIterator());
   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
                           CGF.Builder.GetInsertPoint());
-  llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
+  llvm::OpenMPIRBuilder::LocationDescription OmpLoc(
+      CodeGenIP, CGF.SourceLocToDebugLoc(Loc));
   llvm::SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos;
 
   CodeGenFunction::OMPPrivateScope Scope(CGF);

From 3651ae013e43482ea587067e575c89e9495b0f33 Mon Sep 17 00:00:00 2001
From: Chris Copeland <chris@chrisnc.net>
Date: Wed, 24 Jul 2024 05:53:39 -0700
Subject: [PATCH 015/427] [clang] Define `ATOMIC_FLAG_INIT` correctly for C++.
 (#97534)

(cherry picked from commit 4bb3a1e16f3a854d05bc0b8c5b6f8f78effb1d93)
---
 clang/docs/ReleaseNotes.rst    | 3 +++
 clang/lib/Headers/stdatomic.h  | 4 ++++
 clang/test/Headers/stdatomic.c | 5 +++++
 3 files changed, 12 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 24d88ed6edf00..5b6ee9830b507 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -891,6 +891,9 @@ Bug Fixes in This Version
 - Fixed an assertion failure when a template non-type parameter contains
   an invalid expression.
 
+- Fixed the definition of ``ATOMIC_FLAG_INIT`` in ``<stdatomic.h>`` so it can
+  be used in C++.
+
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h
index 2027055f38796..1991351f9e9ef 100644
--- a/clang/lib/Headers/stdatomic.h
+++ b/clang/lib/Headers/stdatomic.h
@@ -172,7 +172,11 @@ typedef _Atomic(uintmax_t)          atomic_uintmax_t;
 
 typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;
 
+#ifdef __cplusplus
+#define ATOMIC_FLAG_INIT {false}
+#else
 #define ATOMIC_FLAG_INIT { 0 }
+#endif
 
 /* These should be provided by the libc implementation. */
 #ifdef __cplusplus
diff --git a/clang/test/Headers/stdatomic.c b/clang/test/Headers/stdatomic.c
index 3643fd4245b31..9afd531a9ed9b 100644
--- a/clang/test/Headers/stdatomic.c
+++ b/clang/test/Headers/stdatomic.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -std=c11 -E %s | FileCheck %s
 // RUN: %clang_cc1 -std=c11 -fms-compatibility -E %s | FileCheck %s
+// RUN: %clang_cc1 -std=c11 %s -verify
+// RUN: %clang_cc1 -x c++ -std=c++11 %s -verify
+// expected-no-diagnostics
 #include <stdatomic.h>
 
 int bool_lock_free = ATOMIC_BOOL_LOCK_FREE;
@@ -31,3 +34,5 @@ int llong_lock_free = ATOMIC_LLONG_LOCK_FREE;
 
 int pointer_lock_free = ATOMIC_POINTER_LOCK_FREE;
 // CHECK: pointer_lock_free = {{ *[012] *;}}
+
+atomic_flag f = ATOMIC_FLAG_INIT;

From 0934f6d441183ed58c9b4197d29668bbe2d3ea7d Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Tue, 23 Jul 2024 07:30:02 +0000
Subject: [PATCH 016/427] Precommit vscale-fixups.ll test (NFC)

Precommit test for #100080.

(cherry picked from commit c1b70fa5bfea973d4141e27cf9668e9325609e19)
---
 .../AArch64/vscale-fixups.ll                  | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
index 483955c1c57a0..56b59012eef40 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
@@ -384,4 +384,51 @@ for.exit:
   ret void
 }
 
+;; This test demonstrates an incorrect MUL VL address calculation. Here there
+;; are two writes that should be `16 * vscale * vscale` apart, however,
+;; loop-strength-reduce has ignored the second `vscale` and offset the second
+;; write by `#4, mul vl` which is an offset of `16 * vscale` dropping a vscale.
+define void @vscale_squared_offset(ptr %alloc) #0 {
+; COMMON-LABEL: vscale_squared_offset:
+; COMMON:       // %bb.0: // %entry
+; COMMON-NEXT:    fmov z0.s, #4.00000000
+; COMMON-NEXT:    mov x8, xzr
+; COMMON-NEXT:    cntw x9
+; COMMON-NEXT:    fmov z1.s, #8.00000000
+; COMMON-NEXT:    ptrue p0.s, vl1
+; COMMON-NEXT:    cmp x8, x9
+; COMMON-NEXT:    b.ge .LBB6_2
+; COMMON-NEXT:  .LBB6_1: // %for.body
+; COMMON-NEXT:    // =>This Inner Loop Header: Depth=1
+; COMMON-NEXT:    st1w { z0.s }, p0, [x0]
+; COMMON-NEXT:    add x8, x8, #1
+; COMMON-NEXT:    st1w { z1.s }, p0, [x0, #4, mul vl]
+; COMMON-NEXT:    addvl x0, x0, #1
+; COMMON-NEXT:    cmp x8, x9
+; COMMON-NEXT:    b.lt .LBB6_1
+; COMMON-NEXT:  .LBB6_2: // %for.exit
+; COMMON-NEXT:    ret
+entry:
+  %vscale = call i64 @llvm.vscale.i64()
+  %c4_vscale = mul i64 %vscale, 4
+  br label %for.check
+for.check:
+  %i = phi i64 [ %next_i, %for.body ], [ 0, %entry ]
+  %is_lt = icmp slt i64 %i, %c4_vscale
+  br i1 %is_lt, label %for.body, label %for.exit
+for.body:
+  %mask = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 0, i64 1)
+  %upper_offset = mul i64 %i, %c4_vscale
+  %upper_ptr = getelementptr float, ptr %alloc, i64 %upper_offset
+  call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 4.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr %upper_ptr, i32 4, <vscale x 4 x i1> %mask)
+  %lower_i = add i64 %i, %c4_vscale
+  %lower_offset = mul i64 %lower_i, %c4_vscale
+  %lower_ptr = getelementptr float, ptr %alloc, i64 %lower_offset
+  call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 8.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), ptr %lower_ptr, i32 4, <vscale x 4 x i1> %mask)
+  %next_i = add i64 %i, 1
+  br label %for.check
+for.exit:
+  ret void
+}
+
 attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }

From 75642a00e15b722bdfb90726be31f1c8adaeb0c5 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell@arm.com>
Date: Wed, 24 Jul 2024 10:06:34 +0100
Subject: [PATCH 017/427] [LSR] Fix matching vscale immediates (#100080)

Somewhat confusingly a `SCEVMulExpr` is a `SCEVNAryExpr`, so can have
> 2 operands. Previously, the vscale immediate matching did not check
the number of operands of the `SCEVMulExpr`, so would ignore any
operands after the first two.

This led to incorrect codegen (and results) for ArmSME in IREE
(https://github.com/iree-org/iree), which sometimes addresses things
that are a `vscale * vscale` multiple away. The test added with this
change shows an example reduced from IREE. The second write should
be offset from the first `16 * vscale * vscale` (* 4 bytes), however,
previously LSR dropped the second vscale and instead offset the write by
`#4, mul vl`, which is an offset of `16 * vscale` (* 4 bytes).

(cherry picked from commit 7fad04e94b7b594389111ae7eca0883ef18dc90b)
---
 .../Transforms/Scalar/LoopStrengthReduce.cpp  |  6 ++++--
 .../AArch64/vscale-fixups.ll                  | 20 +++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 11f9f7822a15c..91461d1ed2759 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -946,13 +946,15 @@ static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
                            SCEV::FlagAnyWrap);
     return Result;
-  } else if (EnableVScaleImmediates)
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
+  } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    if (EnableVScaleImmediates && M->getNumOperands() == 2) {
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
         if (isa<SCEVVScale>(M->getOperand(1))) {
           S = SE.getConstant(M->getType(), 0);
           return Immediate::getScalable(C->getValue()->getSExtValue());
         }
+    }
+  }
   return Immediate::getZero();
 }
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
index 56b59012eef40..588696d20227f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
@@ -384,27 +384,31 @@ for.exit:
   ret void
 }
 
-;; This test demonstrates an incorrect MUL VL address calculation. Here there
-;; are two writes that should be `16 * vscale * vscale` apart, however,
-;; loop-strength-reduce has ignored the second `vscale` and offset the second
-;; write by `#4, mul vl` which is an offset of `16 * vscale` dropping a vscale.
+;; Here are two writes that should be `16 * vscale * vscale` apart, so MUL VL
+;; addressing cannot be used to offset the second write, as for example,
+;; `#4, mul vl` would only be an offset of `16 * vscale` (dropping a vscale).
 define void @vscale_squared_offset(ptr %alloc) #0 {
 ; COMMON-LABEL: vscale_squared_offset:
 ; COMMON:       // %bb.0: // %entry
+; COMMON-NEXT:    rdvl x9, #1
 ; COMMON-NEXT:    fmov z0.s, #4.00000000
 ; COMMON-NEXT:    mov x8, xzr
-; COMMON-NEXT:    cntw x9
+; COMMON-NEXT:    lsr x9, x9, #4
 ; COMMON-NEXT:    fmov z1.s, #8.00000000
+; COMMON-NEXT:    cntw x10
 ; COMMON-NEXT:    ptrue p0.s, vl1
-; COMMON-NEXT:    cmp x8, x9
+; COMMON-NEXT:    umull x9, w9, w9
+; COMMON-NEXT:    lsl x9, x9, #6
+; COMMON-NEXT:    cmp x8, x10
 ; COMMON-NEXT:    b.ge .LBB6_2
 ; COMMON-NEXT:  .LBB6_1: // %for.body
 ; COMMON-NEXT:    // =>This Inner Loop Header: Depth=1
+; COMMON-NEXT:    add x11, x0, x9
 ; COMMON-NEXT:    st1w { z0.s }, p0, [x0]
 ; COMMON-NEXT:    add x8, x8, #1
-; COMMON-NEXT:    st1w { z1.s }, p0, [x0, #4, mul vl]
+; COMMON-NEXT:    st1w { z1.s }, p0, [x11]
 ; COMMON-NEXT:    addvl x0, x0, #1
-; COMMON-NEXT:    cmp x8, x9
+; COMMON-NEXT:    cmp x8, x10
 ; COMMON-NEXT:    b.lt .LBB6_1
 ; COMMON-NEXT:  .LBB6_2: // %for.exit
 ; COMMON-NEXT:    ret

From a87fbeb3a77a53ded341277c5b326f7696d47594 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Wed, 24 Jul 2024 20:06:36 +0800
Subject: [PATCH 018/427] [ValueTracking] Don't use CondContext in dataflow
 analysis of phi nodes (#100316)

See the following case:
```
define i16 @pr100298() {
entry:
  br label %for.inc

for.inc:
  %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
  %add = add nsw i32 %indvar, 9
  %mask = and i32 %add, 65535
  %cmp1 = icmp ugt i32 %mask, 5
  br i1 %cmp1, label %for.inc, label %for.end

for.end:
  %conv = trunc i32 %add to i16
  %cmp2 = icmp ugt i32 %mask, 3
  %shl = shl nuw i16 %conv, 14
  %res = select i1 %cmp2, i16 %conv, i16 %shl
  ret i16 %res
}
```

When computing knownbits of `%shl` with `%cmp2=false`, we cannot use
this condition in the analysis of `%mask (%for.inc -> %for.inc)`.

Fixes https://github.com/llvm/llvm-project/issues/100298.

(cherry picked from commit 59eae919c938f890e9b9b4be8a3fa3cb1b11ed89)
---
 llvm/include/llvm/Analysis/SimplifyQuery.h   |  6 +++
 llvm/lib/Analysis/ValueTracking.cpp          | 22 +++++------
 llvm/test/Transforms/InstCombine/pr100298.ll | 39 ++++++++++++++++++++
 3 files changed, 56 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/pr100298.ll

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index a560744f01222..e8f43c8c2e91f 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -130,6 +130,12 @@ struct SimplifyQuery {
     Copy.CC = &CC;
     return Copy;
   }
+
+  SimplifyQuery getWithoutCondContext() const {
+    SimplifyQuery Copy(*this);
+    Copy.CC = nullptr;
+    return Copy;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 40fe1ffe13f1b..4b77c0046cc70 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1435,7 +1435,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // inferred hold at original context instruction.  TODO: It may be
         // correct to use the original context.  IF warranted, explore and
         // add sufficient tests to cover.
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
         RecQ.CxtI = P;
         computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
         switch (Opcode) {
@@ -1468,7 +1468,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
         // D69571).
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
 
         unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
         Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
@@ -1546,7 +1546,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
         // D69571).
-        SimplifyQuery RecQ = Q;
+        SimplifyQuery RecQ = Q.getWithoutCondContext();
         RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
 
         Known2 = KnownBits(BitWidth);
@@ -2329,7 +2329,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
     // it is an induction variable where in each step its value is a power of
     // two.
     auto *PN = cast<PHINode>(I);
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
 
     // Check if it is an induction variable and always power of two.
     if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
@@ -2943,7 +2943,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
       return true;
 
     // Check if all incoming values are non-zero using recursion.
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
     unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       if (U.get() == PN)
@@ -3509,7 +3509,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
     if (UsedFullRecursion)
       return false;
 
-    SimplifyQuery RecQ = Q;
+    SimplifyQuery RecQ = Q.getWithoutCondContext();
     RecQ.CxtI = IncomBB->getTerminator();
     if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
       return false;
@@ -4001,7 +4001,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
 
       // Take the minimum of all incoming values.  This can't infinitely loop
       // because of our depth threshold.
-      SimplifyQuery RecQ = Q;
+      SimplifyQuery RecQ = Q.getWithoutCondContext();
       Tmp = TyBits;
       for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
         if (Tmp == 1) return Tmp;
@@ -5909,10 +5909,10 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
         // Recurse, but cap the recursion to two levels, because we don't want
         // to waste time spinning around in loops. We need at least depth 2 to
         // detect known sign bits.
-        computeKnownFPClass(
-            IncValue, DemandedElts, InterestedClasses, KnownSrc,
-            PhiRecursionLimit,
-            Q.getWithInstruction(P->getIncomingBlock(U)->getTerminator()));
+        computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
+                            PhiRecursionLimit,
+                            Q.getWithoutCondContext().getWithInstruction(
+                                P->getIncomingBlock(U)->getTerminator()));
 
         if (First) {
           Known = KnownSrc;
diff --git a/llvm/test/Transforms/InstCombine/pr100298.ll b/llvm/test/Transforms/InstCombine/pr100298.ll
new file mode 100644
index 0000000000000..6cf2a71bb916e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr100298.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Make sure that the result of computeKnownBits for %indvar is correct.
+
+define i16 @pr100298() {
+; CHECK-LABEL: define i16 @pr100298() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_INC:.*]]
+; CHECK:       [[FOR_INC]]:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ -15, %[[ENTRY]] ], [ [[MASK:%.*]], %[[FOR_INC]] ]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDVAR]], 9
+; CHECK-NEXT:    [[MASK]] = and i32 [[ADD]], 65535
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[MASK]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label %[[FOR_INC]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD]] to i16
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[MASK]], 3
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i16 [[CONV]], 14
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP2]], i16 [[CONV]], i16 [[SHL]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  br label %for.inc
+
+for.inc:
+  %indvar = phi i32 [ -15, %entry ], [ %mask, %for.inc ]
+  %add = add nsw i32 %indvar, 9
+  %mask = and i32 %add, 65535
+  %cmp1 = icmp ugt i32 %mask, 5
+  br i1 %cmp1, label %for.inc, label %for.end
+
+for.end:
+  %conv = trunc i32 %add to i16
+  %cmp2 = icmp ugt i32 %mask, 3
+  %shl = shl nuw i16 %conv, 14
+  %res = select i1 %cmp2, i16 %conv, i16 %shl
+  ret i16 %res
+}

From fc0b1ce075a570a7631eab23ab865023346b574e Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanak@gmail.com>
Date: Wed, 24 Jul 2024 02:04:37 -0700
Subject: [PATCH 019/427] [PAC] Define __builtin_ptrauth_type_discriminator
 (#100204)

The builtin computes the discriminator for a type, which can be used to
sign/authenticate function pointers and member function pointers.

If the type passed to the builtin is a C++ member function pointer type,
the result is the discriminator used to signed member function pointers
of that type. If the type is a function, function pointer, or function
reference type, the result is the discriminator used to sign functions
of that type. It is ill-formed to use this builtin with any other type.

A call to this function is an integer constant expression.

Co-Authored-By: John McCall rjmccall@apple.com
(cherry picked from commit 666e3326fedfb6a033494c36c36aa95c4124d642)
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +
 clang/include/clang/Basic/TokenKinds.def      |  2 +
 clang/include/clang/Parse/Parser.h            |  2 +
 clang/include/clang/Sema/Sema.h               |  2 +
 clang/lib/AST/ExprConstant.cpp                |  6 ++
 clang/lib/AST/ItaniumMangle.cpp               |  8 +++
 clang/lib/Headers/ptrauth.h                   | 19 ++++++
 clang/lib/Parse/ParseExpr.cpp                 | 23 +++++++
 clang/lib/Sema/SemaChecking.cpp               | 10 ++-
 clang/lib/Sema/SemaExpr.cpp                   | 19 ++++++
 clang/test/AST/ast-dump-ptrauth-json.cpp      |  5 ++
 clang/test/CodeGenCXX/mangle-fail.cpp         | 14 ++++
 clang/test/Sema/ptrauth-intrinsics-macro.c    |  5 ++
 .../SemaCXX/ptrauth-type-discriminator.cpp    | 68 +++++++++++++++++++
 14 files changed, 183 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/AST/ast-dump-ptrauth-json.cpp
 create mode 100644 clang/test/SemaCXX/ptrauth-type-discriminator.cpp

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b8d97a6b14fe6..eb0506e71fe3f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -942,6 +942,9 @@ def warn_ptrauth_auth_null_pointer :
   InGroup<PtrAuthNullPointers>;
 def err_ptrauth_string_not_literal : Error<
   "argument must be a string literal%select{| of char type}0">;
+def err_ptrauth_type_disc_undiscriminated : Error<
+  "cannot pass undiscriminated type %0 to "
+  "'__builtin_ptrauth_type_discriminator'">;
 
 def note_ptrauth_virtual_function_pointer_incomplete_arg_ret :
   Note<"cannot take an address of a virtual member function if its return or "
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 7f4912b9bcd96..8c54661e65cf4 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -596,6 +596,8 @@ ALIAS("__is_same_as", __is_same, KEYCXX)
 KEYWORD(__private_extern__          , KEYALL)
 KEYWORD(__module_private__          , KEYALL)
 
+UNARY_EXPR_OR_TYPE_TRAIT(__builtin_ptrauth_type_discriminator, PtrAuthTypeDiscriminator, KEYALL)
+
 // Extension that will be enabled for Microsoft, Borland and PS4, but can be
 // disabled via '-fno-declspec'.
 KEYWORD(__declspec                  , 0)
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 613bab9120dfc..35bb1a19d40f0 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3890,6 +3890,8 @@ class Parser : public CodeCompletionHandler {
   ExprResult ParseArrayTypeTrait();
   ExprResult ParseExpressionTrait();
 
+  ExprResult ParseBuiltinPtrauthTypeDiscriminator();
+
   //===--------------------------------------------------------------------===//
   // Preprocessor code-completion pass-through
   void CodeCompleteDirective(bool InConditional) override;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d638d31e050dc..7bfdaaae45a93 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3456,6 +3456,8 @@ class Sema final : public SemaBase {
                                     TemplateIdAnnotation *TemplateId,
                                     bool IsMemberSpecialization);
 
+  bool checkPointerAuthEnabled(SourceLocation Loc, SourceRange Range);
+
   bool checkConstantPointerAuthKey(Expr *keyExpr, unsigned &key);
 
   /// Diagnose function specifiers on a declaration of an identifier that
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fcb382474ea62..03a606102a77e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14054,6 +14054,12 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
                      E);
   }
 
+  case UETT_PtrAuthTypeDiscriminator: {
+    if (E->getArgumentType()->isDependentType())
+      return false;
+    return Success(
+        Info.Ctx.getPointerAuthTypeDiscriminator(E->getArgumentType()), E);
+  }
   case UETT_VecStep: {
     QualType Ty = E->getTypeOfArgument();
 
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 40ef82785f454..d46d621d4c7d4 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -5179,6 +5179,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
       Diags.Report(DiagID);
       return;
     }
+    case UETT_PtrAuthTypeDiscriminator: {
+      DiagnosticsEngine &Diags = Context.getDiags();
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Error,
+          "cannot yet mangle __builtin_ptrauth_type_discriminator expression");
+      Diags.Report(E->getExprLoc(), DiagID);
+      return;
+    }
     case UETT_VecStep: {
       DiagnosticsEngine &Diags = Context.getDiags();
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index e0bc8c4f9acf7..4724155b0dc79 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -202,6 +202,23 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define ptrauth_string_discriminator(__string)                                 \
   __builtin_ptrauth_string_discriminator(__string)
 
+/* Compute a constant discriminator from the given type.
+
+   The result can be used as the second argument to
+   ptrauth_blend_discriminator or the third argument to the
+   __ptrauth qualifier.  It has type size_t.
+
+   If the type is a C++ member function pointer type, the result is
+   the discriminator used to signed member function pointers of that
+   type.  If the type is a function, function pointer, or function
+   reference type, the result is the discriminator used to sign
+   functions of that type.  It is ill-formed to use this macro with any
+   other type.
+
+   A call to this function is an integer constant expression. */
+#define ptrauth_type_discriminator(__type)                                     \
+  __builtin_ptrauth_type_discriminator(__type)
+
 /* Compute a signature for the given pair of pointer-sized values.
    The order of the arguments is significant.
 
@@ -289,6 +306,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
     ((ptrauth_extra_data_t)0);                                                 \
   })
 
+#define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
+
 #define ptrauth_sign_generic_data(__value, __data)                             \
   ({                                                                           \
     (void)__value;                                                             \
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index a12c375c8d48c..0a017ae79de75 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -841,6 +841,26 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
   return false;
 }
 
+ExprResult Parser::ParseBuiltinPtrauthTypeDiscriminator() {
+  SourceLocation Loc = ConsumeToken();
+
+  BalancedDelimiterTracker T(*this, tok::l_paren);
+  if (T.expectAndConsume())
+    return ExprError();
+
+  TypeResult Ty = ParseTypeName();
+  if (Ty.isInvalid()) {
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return ExprError();
+  }
+
+  SourceLocation EndLoc = Tok.getLocation();
+  T.consumeClose();
+  return Actions.ActOnUnaryExprOrTypeTraitExpr(
+      Loc, UETT_PtrAuthTypeDiscriminator,
+      /*isType=*/true, Ty.get().getAsOpaquePtr(), SourceRange(Loc, EndLoc));
+}
+
 /// Parse a cast-expression, or, if \pisUnaryExpression is true, parse
 /// a unary-expression.
 ///
@@ -1806,6 +1826,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
     Res = ParseArrayTypeTrait();
     break;
 
+  case tok::kw___builtin_ptrauth_type_discriminator:
+    return ParseBuiltinPtrauthTypeDiscriminator();
+
   case tok::kw___is_lvalue_expr:
   case tok::kw___is_rvalue_expr:
     if (NotPrimaryExpression)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 45b9bbb23dbf7..cf1196ad23c21 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1489,14 +1489,18 @@ enum PointerAuthOpKind {
 };
 }
 
-static bool checkPointerAuthEnabled(Sema &S, Expr *E) {
-  if (S.getLangOpts().PointerAuthIntrinsics)
+bool Sema::checkPointerAuthEnabled(SourceLocation Loc, SourceRange Range) {
+  if (getLangOpts().PointerAuthIntrinsics)
     return false;
 
-  S.Diag(E->getExprLoc(), diag::err_ptrauth_disabled) << E->getSourceRange();
+  Diag(Loc, diag::err_ptrauth_disabled) << Range;
   return true;
 }
 
+static bool checkPointerAuthEnabled(Sema &S, Expr *E) {
+  return S.checkPointerAuthEnabled(E->getExprLoc(), E->getSourceRange());
+}
+
 static bool checkPointerAuthKey(Sema &S, Expr *&Arg) {
   // Convert it to type 'int'.
   if (convertArgumentToType(S, Arg, S.Context.IntTy))
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 439db55668cc6..9207bf7a41349 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4117,6 +4117,21 @@ static bool CheckVectorElementsTraitOperandType(Sema &S, QualType T,
   return false;
 }
 
+static bool checkPtrAuthTypeDiscriminatorOperandType(Sema &S, QualType T,
+                                                     SourceLocation Loc,
+                                                     SourceRange ArgRange) {
+  if (S.checkPointerAuthEnabled(Loc, ArgRange))
+    return true;
+
+  if (!T->isFunctionType() && !T->isFunctionPointerType() &&
+      !T->isFunctionReferenceType() && !T->isMemberFunctionPointerType()) {
+    S.Diag(Loc, diag::err_ptrauth_type_disc_undiscriminated) << T << ArgRange;
+    return true;
+  }
+
+  return false;
+}
+
 static bool CheckExtensionTraitOperandType(Sema &S, QualType T,
                                            SourceLocation Loc,
                                            SourceRange ArgRange,
@@ -4511,6 +4526,10 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType,
     return CheckVectorElementsTraitOperandType(*this, ExprType, OpLoc,
                                                ExprRange);
 
+  if (ExprKind == UETT_PtrAuthTypeDiscriminator)
+    return checkPtrAuthTypeDiscriminatorOperandType(*this, ExprType, OpLoc,
+                                                    ExprRange);
+
   // Explicitly list some types as extensions.
   if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange,
                                       ExprKind))
diff --git a/clang/test/AST/ast-dump-ptrauth-json.cpp b/clang/test/AST/ast-dump-ptrauth-json.cpp
new file mode 100644
index 0000000000000..125cda0cff53a
--- /dev/null
+++ b/clang/test/AST/ast-dump-ptrauth-json.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -std=c++11 -ast-dump=json %s | FileCheck %s
+
+// CHECK: "name": "__builtin_ptrauth_type_discriminator",
+
+int d = __builtin_ptrauth_type_discriminator(int());
diff --git a/clang/test/CodeGenCXX/mangle-fail.cpp b/clang/test/CodeGenCXX/mangle-fail.cpp
index b588d57749fa3..f3b50cfb54dbd 100644
--- a/clang/test/CodeGenCXX/mangle-fail.cpp
+++ b/clang/test/CodeGenCXX/mangle-fail.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -emit-llvm-only -x c++ -std=c++11 -triple %itanium_abi_triple -verify %s -DN=1
 // RUN: %clang_cc1 -emit-llvm-only -x c++ -std=c++11 -triple %itanium_abi_triple -verify %s -DN=2
+// RUN: %clang_cc1 -emit-llvm-only -x c++ -std=c++11 -triple aarch64-linux-gnu -fptrauth-intrinsics -verify %s -DN=3
 
 struct A { int a; };
 
@@ -13,6 +14,19 @@ template void test<int>(int (&)[sizeof(int)]);
 template<class T> void test(int (&)[sizeof((A){}, T())]) {} // expected-error {{cannot yet mangle}}
 template void test<int>(int (&)[sizeof(A)]);
 
+#elif N == 3
+// __builtin_ptrauth_type_discriminator
+template <class T, unsigned disc>
+struct S1 {};
+
+template<class T>
+void func(S1<T, __builtin_ptrauth_type_discriminator(T)> s1) { // expected-error {{cannot yet mangle __builtin_ptrauth_type_discriminator expression}}
+}
+
+void testfunc1() {
+  func(S1<int(), __builtin_ptrauth_type_discriminator(int())>());
+}
+
 // FIXME: There are several more cases we can't yet mangle.
 
 #else
diff --git a/clang/test/Sema/ptrauth-intrinsics-macro.c b/clang/test/Sema/ptrauth-intrinsics-macro.c
index f76f677315dd3..adbb71a9d6e50 100644
--- a/clang/test/Sema/ptrauth-intrinsics-macro.c
+++ b/clang/test/Sema/ptrauth-intrinsics-macro.c
@@ -38,6 +38,11 @@ void test_string_discriminator(int *dp) {
   (void)t0;
 }
 
+void test_type_discriminator(int *dp) {
+  ptrauth_extra_data_t t0 = ptrauth_type_discriminator(int (*)(int));
+  (void)t0;
+}
+
 void test_sign_constant(int *dp) {
   dp = ptrauth_sign_constant(&dv, VALID_DATA_KEY, 0);
 }
diff --git a/clang/test/SemaCXX/ptrauth-type-discriminator.cpp b/clang/test/SemaCXX/ptrauth-type-discriminator.cpp
new file mode 100644
index 0000000000000..685ca1f03fddd
--- /dev/null
+++ b/clang/test/SemaCXX/ptrauth-type-discriminator.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -std=c++17 -Wno-vla -fsyntax-only -verify -fptrauth-intrinsics %s
+
+// RUN: not %clang_cc1 -triple arm64-apple-ios -std=c++17 -Wno-vla -fsyntax-only %s 2>&1 | FileCheck %s
+// CHECK: this target does not support pointer authentication
+
+struct S {
+  virtual int foo();
+};
+
+template <class T>
+constexpr unsigned dependentOperandDisc() {
+  return __builtin_ptrauth_type_discriminator(T);
+}
+
+void test_builtin_ptrauth_type_discriminator(unsigned s) {
+  typedef int (S::*MemFnTy)();
+  MemFnTy memFnPtr;
+  int (S::*memFnPtr2)();
+  constexpr unsigned d0 = __builtin_ptrauth_type_discriminator(MemFnTy);
+  static_assert(d0 == __builtin_ptrauth_string_discriminator("_ZTSM1SFivE"));
+  static_assert(d0 == 60844);
+  static_assert(__builtin_ptrauth_type_discriminator(int (S::*)()) == d0);
+  static_assert(__builtin_ptrauth_type_discriminator(decltype(memFnPtr)) == d0);
+  static_assert(__builtin_ptrauth_type_discriminator(decltype(memFnPtr2)) == d0);
+  static_assert(__builtin_ptrauth_type_discriminator(decltype(&S::foo)) == d0);
+  static_assert(dependentOperandDisc<decltype(&S::foo)>() == d0);
+
+  constexpr unsigned d1 = __builtin_ptrauth_type_discriminator(void (S::*)(int));
+  static_assert(__builtin_ptrauth_string_discriminator("_ZTSM1SFviE") == d1);
+  static_assert(d1 == 39121);
+
+  constexpr unsigned d2 = __builtin_ptrauth_type_discriminator(void (S::*)(float));
+  static_assert(__builtin_ptrauth_string_discriminator("_ZTSM1SFvfE") == d2);
+  static_assert(d2 == 52453);
+
+  constexpr unsigned d3 = __builtin_ptrauth_type_discriminator(int (*())[s]);
+  static_assert(__builtin_ptrauth_string_discriminator("FPE") == d3);
+  static_assert(d3 == 34128);
+
+  int f4(float);
+  constexpr unsigned d4 = __builtin_ptrauth_type_discriminator(decltype(f4));
+  static_assert(__builtin_ptrauth_type_discriminator(int (*)(float)) == d4);
+  static_assert(__builtin_ptrauth_string_discriminator("FifE") == d4);
+  static_assert(d4 == 48468);
+
+  int f5(int);
+  constexpr unsigned d5 = __builtin_ptrauth_type_discriminator(decltype(f5));
+  static_assert(__builtin_ptrauth_type_discriminator(int (*)(int)) == d5);
+  static_assert(__builtin_ptrauth_type_discriminator(short (*)(short)) == d5);
+  static_assert(__builtin_ptrauth_type_discriminator(char (*)(char)) == d5);
+  static_assert(__builtin_ptrauth_type_discriminator(long (*)(long)) == d5);
+  static_assert(__builtin_ptrauth_type_discriminator(unsigned int (*)(unsigned int)) == d5);
+  static_assert(__builtin_ptrauth_type_discriminator(int (&)(int)) == d5);
+  static_assert(__builtin_ptrauth_string_discriminator("FiiE") == d5);
+  static_assert(d5 == 2981);
+
+  int t;
+  int vmarray[s];
+  (void)__builtin_ptrauth_type_discriminator(t); // expected-error {{unknown type name 't'}}
+  (void)__builtin_ptrauth_type_discriminator(&t); // expected-error {{expected a type}}
+  (void)__builtin_ptrauth_type_discriminator(decltype(vmarray)); // expected-error {{cannot pass undiscriminated type 'decltype(vmarray)' (aka 'int[s]')}}
+  (void)__builtin_ptrauth_type_discriminator(int *); // expected-error {{cannot pass undiscriminated type 'int *' to '__builtin_ptrauth_type_discriminator'}}
+  (void)__builtin_ptrauth_type_discriminator(); // expected-error {{expected a type}}
+  (void)__builtin_ptrauth_type_discriminator(int (*)(int), int (*)(int));
+  // expected-error@-1 {{expected ')'}}
+  // expected-note@-2 {{to match this '('}}
+}

From 5f8189c47d94f2cf909ca3c618e72475ab1166c4 Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier@nvidia.com>
Date: Wed, 24 Jul 2024 10:24:04 +0200
Subject: [PATCH 020/427] [flang] fix C_PTR function result lowering (#100082)

Functions returning C_PTR were lowered to function returning intptr (i64
on 64bit arch). This caused conflicts when these functions were defined
as returning !fir.ref<none>/llvm.ptr in other compiler generated
contexts (e.g., malloc).

Lower them to return !fir.ref<none>.

This should deal with https://github.com/llvm/llvm-project/issues/97325
and https://github.com/llvm/llvm-project/issues/98644.

(cherry picked from commit 1ead51a86c6c746a1b9948ca1ee142df223ffebd)
---
 flang/lib/Optimizer/Builder/FIRBuilder.cpp    |  54 +++++----
 .../Optimizer/Transforms/AbstractResult.cpp   | 108 +++++++++---------
 flang/test/Fir/abstract-results.fir           |  36 +++---
 3 files changed, 110 insertions(+), 88 deletions(-)

diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index 2961df96b3cab..fbe79d0e45e5a 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -1541,21 +1541,44 @@ mlir::Value fir::factory::genMaxWithZero(fir::FirOpBuilder &builder,
                                                zero);
 }
 
+static std::pair<mlir::Value, mlir::Type>
+genCPtrOrCFunptrFieldIndex(fir::FirOpBuilder &builder, mlir::Location loc,
+                           mlir::Type cptrTy) {
+  auto recTy = mlir::cast<fir::RecordType>(cptrTy);
+  assert(recTy.getTypeList().size() == 1);
+  auto addrFieldName = recTy.getTypeList()[0].first;
+  mlir::Type addrFieldTy = recTy.getTypeList()[0].second;
+  auto fieldIndexType = fir::FieldType::get(cptrTy.getContext());
+  mlir::Value addrFieldIndex = builder.create<fir::FieldIndexOp>(
+      loc, fieldIndexType, addrFieldName, recTy,
+      /*typeParams=*/mlir::ValueRange{});
+  return {addrFieldIndex, addrFieldTy};
+}
+
 mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
                                                mlir::Location loc,
                                                mlir::Value cPtr,
                                                mlir::Type ty) {
-  assert(mlir::isa<fir::RecordType>(ty));
-  auto recTy = mlir::dyn_cast<fir::RecordType>(ty);
-  assert(recTy.getTypeList().size() == 1);
-  auto fieldName = recTy.getTypeList()[0].first;
-  mlir::Type fieldTy = recTy.getTypeList()[0].second;
-  auto fieldIndexType = fir::FieldType::get(ty.getContext());
-  mlir::Value field =
-      builder.create<fir::FieldIndexOp>(loc, fieldIndexType, fieldName, recTy,
-                                        /*typeParams=*/mlir::ValueRange{});
-  return builder.create<fir::CoordinateOp>(loc, builder.getRefType(fieldTy),
-                                           cPtr, field);
+  auto [addrFieldIndex, addrFieldTy] =
+      genCPtrOrCFunptrFieldIndex(builder, loc, ty);
+  return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
+                                           cPtr, addrFieldIndex);
+}
+
+mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
+                                                mlir::Location loc,
+                                                mlir::Value cPtr) {
+  mlir::Type cPtrTy = fir::unwrapRefType(cPtr.getType());
+  if (fir::isa_ref_type(cPtr.getType())) {
+    mlir::Value cPtrAddr =
+        fir::factory::genCPtrOrCFunptrAddr(builder, loc, cPtr, cPtrTy);
+    return builder.create<fir::LoadOp>(loc, cPtrAddr);
+  }
+  auto [addrFieldIndex, addrFieldTy] =
+      genCPtrOrCFunptrFieldIndex(builder, loc, cPtrTy);
+  auto arrayAttr =
+      builder.getArrayAttr({builder.getIntegerAttr(builder.getIndexType(), 0)});
+  return builder.create<fir::ExtractValueOp>(loc, addrFieldTy, cPtr, arrayAttr);
 }
 
 fir::BoxValue fir::factory::createBoxValue(fir::FirOpBuilder &builder,
@@ -1596,15 +1619,6 @@ fir::BoxValue fir::factory::createBoxValue(fir::FirOpBuilder &builder,
   return fir::BoxValue(box, lbounds, explicitTypeParams);
 }
 
-mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
-                                                mlir::Location loc,
-                                                mlir::Value cPtr) {
-  mlir::Type cPtrTy = fir::unwrapRefType(cPtr.getType());
-  mlir::Value cPtrAddr =
-      fir::factory::genCPtrOrCFunptrAddr(builder, loc, cPtr, cPtrTy);
-  return builder.create<fir::LoadOp>(loc, cPtrAddr);
-}
-
 mlir::Value fir::factory::createNullBoxProc(fir::FirOpBuilder &builder,
                                             mlir::Location loc,
                                             mlir::Type boxType) {
diff --git a/flang/lib/Optimizer/Transforms/AbstractResult.cpp b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
index 3906aa553cb34..ff37310224e85 100644
--- a/flang/lib/Optimizer/Transforms/AbstractResult.cpp
+++ b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
@@ -59,14 +59,16 @@ static mlir::FunctionType getNewFunctionType(mlir::FunctionType funcTy,
                                  /*resultTypes=*/{});
 }
 
+static mlir::Type getVoidPtrType(mlir::MLIRContext *context) {
+  return fir::ReferenceType::get(mlir::NoneType::get(context));
+}
+
 /// This is for function result types that are of type C_PTR from ISO_C_BINDING.
 /// Follow the ABI for interoperability with C.
 static mlir::FunctionType getCPtrFunctionType(mlir::FunctionType funcTy) {
-  auto resultType = funcTy.getResult(0);
-  assert(fir::isa_builtin_cptr_type(resultType));
-  llvm::SmallVector<mlir::Type> outputTypes;
-  auto recTy = mlir::dyn_cast<fir::RecordType>(resultType);
-  outputTypes.emplace_back(recTy.getTypeList()[0].second);
+  assert(fir::isa_builtin_cptr_type(funcTy.getResult(0)));
+  llvm::SmallVector<mlir::Type> outputTypes{
+      getVoidPtrType(funcTy.getContext())};
   return mlir::FunctionType::get(funcTy.getContext(), funcTy.getInputs(),
                                  outputTypes);
 }
@@ -109,15 +111,11 @@ class CallConversion : public mlir::OpRewritePattern<Op> {
           saveResult.getTypeparams());
 
     llvm::SmallVector<mlir::Type> newResultTypes;
-    // TODO: This should be generalized for derived types, and it is
-    // architecture and OS dependent.
     bool isResultBuiltinCPtr = fir::isa_builtin_cptr_type(result.getType());
-    Op newOp;
-    if (isResultBuiltinCPtr) {
-      auto recTy = mlir::dyn_cast<fir::RecordType>(result.getType());
-      newResultTypes.emplace_back(recTy.getTypeList()[0].second);
-    }
+    if (isResultBuiltinCPtr)
+      newResultTypes.emplace_back(getVoidPtrType(result.getContext()));
 
+    Op newOp;
     // fir::CallOp specific handling.
     if constexpr (std::is_same_v<Op, fir::CallOp>) {
       if (op.getCallee()) {
@@ -175,7 +173,7 @@ class CallConversion : public mlir::OpRewritePattern<Op> {
       FirOpBuilder builder(rewriter, module);
       mlir::Value saveAddr = fir::factory::genCPtrOrCFunptrAddr(
           builder, loc, save, result.getType());
-      rewriter.create<fir::StoreOp>(loc, newOp->getResult(0), saveAddr);
+      builder.createStoreWithConvert(loc, newOp->getResult(0), saveAddr);
     }
     op->dropAllReferences();
     rewriter.eraseOp(op);
@@ -210,42 +208,52 @@ class ReturnOpConversion : public mlir::OpRewritePattern<mlir::func::ReturnOp> {
                   mlir::PatternRewriter &rewriter) const override {
     auto loc = ret.getLoc();
     rewriter.setInsertionPoint(ret);
-    auto returnedValue = ret.getOperand(0);
-    bool replacedStorage = false;
-    if (auto *op = returnedValue.getDefiningOp())
-      if (auto load = mlir::dyn_cast<fir::LoadOp>(op)) {
-        auto resultStorage = load.getMemref();
-        // The result alloca may be behind a fir.declare, if any.
-        if (auto declare = mlir::dyn_cast_or_null<fir::DeclareOp>(
-                resultStorage.getDefiningOp()))
-          resultStorage = declare.getMemref();
-        // TODO: This should be generalized for derived types, and it is
-        // architecture and OS dependent.
-        if (fir::isa_builtin_cptr_type(returnedValue.getType())) {
-          rewriter.eraseOp(load);
-          auto module = ret->getParentOfType<mlir::ModuleOp>();
-          FirOpBuilder builder(rewriter, module);
-          mlir::Value retAddr = fir::factory::genCPtrOrCFunptrAddr(
-              builder, loc, resultStorage, returnedValue.getType());
-          mlir::Value retValue = rewriter.create<fir::LoadOp>(
-              loc, fir::unwrapRefType(retAddr.getType()), retAddr);
-          rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(
-              ret, mlir::ValueRange{retValue});
-          return mlir::success();
-        }
-        resultStorage.replaceAllUsesWith(newArg);
-        replacedStorage = true;
-        if (auto *alloc = resultStorage.getDefiningOp())
-          if (alloc->use_empty())
-            rewriter.eraseOp(alloc);
+    mlir::Value resultValue = ret.getOperand(0);
+    fir::LoadOp resultLoad;
+    mlir::Value resultStorage;
+    // Identify result local storage.
+    if (auto load = resultValue.getDefiningOp<fir::LoadOp>()) {
+      resultLoad = load;
+      resultStorage = load.getMemref();
+      // The result alloca may be behind a fir.declare, if any.
+      if (auto declare = resultStorage.getDefiningOp<fir::DeclareOp>())
+        resultStorage = declare.getMemref();
+    }
+    // Replace old local storage with new storage argument, unless
+    // the derived type is C_PTR/C_FUN_PTR, in which case the return
+    // type is updated to return void* (no new argument is passed).
+    if (fir::isa_builtin_cptr_type(resultValue.getType())) {
+      auto module = ret->getParentOfType<mlir::ModuleOp>();
+      FirOpBuilder builder(rewriter, module);
+      mlir::Value cptr = resultValue;
+      if (resultLoad) {
+        // Replace whole derived type load by component load.
+        cptr = resultLoad.getMemref();
+        rewriter.setInsertionPoint(resultLoad);
       }
-    // The result storage may have been optimized out by a memory to
-    // register pass, this is possible for fir.box results, or fir.record
-    // with no length parameters. Simply store the result in the result storage.
-    // at the return point.
-    if (!replacedStorage)
-      rewriter.create<fir::StoreOp>(loc, returnedValue, newArg);
-    rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(ret);
+      mlir::Value newResultValue =
+          fir::factory::genCPtrOrCFunptrValue(builder, loc, cptr);
+      newResultValue = builder.createConvert(
+          loc, getVoidPtrType(ret.getContext()), newResultValue);
+      rewriter.setInsertionPoint(ret);
+      rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(
+          ret, mlir::ValueRange{newResultValue});
+    } else if (resultStorage) {
+      resultStorage.replaceAllUsesWith(newArg);
+      rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(ret);
+    } else {
+      // The result storage may have been optimized out by a memory to
+      // register pass, this is possible for fir.box results, or fir.record
+      // with no length parameters. Simply store the result in the result
+      // storage. at the return point.
+      rewriter.create<fir::StoreOp>(loc, resultValue, newArg);
+      rewriter.replaceOpWithNewOp<mlir::func::ReturnOp>(ret);
+    }
+    // Delete result old local storage if unused.
+    if (resultStorage)
+      if (auto alloc = resultStorage.getDefiningOp<fir::AllocaOp>())
+        if (alloc->use_empty())
+          rewriter.eraseOp(alloc);
     return mlir::success();
   }
 
@@ -263,8 +271,6 @@ class AddrOfOpConversion : public mlir::OpRewritePattern<fir::AddrOfOp> {
                   mlir::PatternRewriter &rewriter) const override {
     auto oldFuncTy = mlir::cast<mlir::FunctionType>(addrOf.getType());
     mlir::FunctionType newFuncTy;
-    // TODO: This should be generalized for derived types, and it is
-    // architecture and OS dependent.
     if (oldFuncTy.getNumResults() != 0 &&
         fir::isa_builtin_cptr_type(oldFuncTy.getResult(0)))
       newFuncTy = getCPtrFunctionType(oldFuncTy);
@@ -298,8 +304,6 @@ class AbstractResultOpt
     // Convert function type itself if it has an abstract result.
     auto funcTy = mlir::cast<mlir::FunctionType>(func.getFunctionType());
     if (hasAbstractResult(funcTy)) {
-      // TODO: This should be generalized for derived types, and it is
-      // architecture and OS dependent.
       if (fir::isa_builtin_cptr_type(funcTy.getResult(0))) {
         func.setType(getCPtrFunctionType(funcTy));
         patterns.insert<ReturnOpConversion>(context, mlir::Value{});
diff --git a/flang/test/Fir/abstract-results.fir b/flang/test/Fir/abstract-results.fir
index 82f1cd33073fd..93e63dc657f0c 100644
--- a/flang/test/Fir/abstract-results.fir
+++ b/flang/test/Fir/abstract-results.fir
@@ -87,8 +87,8 @@ func.func @boxfunc_callee() -> !fir.box<!fir.heap<f64>> {
   // FUNC-BOX: return
 }
 
-// FUNC-REF-LABEL: func @retcptr() -> i64
-// FUNC-BOX-LABEL: func @retcptr() -> i64
+// FUNC-REF-LABEL: func @retcptr() -> !fir.ref<none>
+// FUNC-BOX-LABEL: func @retcptr() -> !fir.ref<none>
 func.func @retcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {
   %0 = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"}
   %1 = fir.load %0 : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
@@ -98,12 +98,14 @@ func.func @retcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__addres
   // FUNC-REF: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
   // FUNC-REF: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
   // FUNC-REF: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<i64>
-  // FUNC-REF: return %[[VAL]] : i64
+  // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref<none>
+  // FUNC-REF: return %[[CAST]] : !fir.ref<none>
   // FUNC-BOX: %[[ALLOC:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "rec", uniq_name = "_QFrecErec"}
   // FUNC-BOX: %[[FIELD:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
   // FUNC-BOX: %[[ADDR:.*]] = fir.coordinate_of %[[ALLOC]], %[[FIELD]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
   // FUNC-BOX: %[[VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<i64>
-  // FUNC-BOX: return %[[VAL]] : i64
+  // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL]] : (i64) -> !fir.ref<none>
+  // FUNC-BOX: return %[[CAST]] : !fir.ref<none>
 }
 
 // FUNC-REF-LABEL:  func private @arrayfunc_callee_declare(
@@ -311,8 +313,8 @@ func.func @test_address_of() {
 
 }
 
-// FUNC-REF-LABEL: func.func private @returns_null() -> i64
-// FUNC-BOX-LABEL: func.func private @returns_null() -> i64
+// FUNC-REF-LABEL: func.func private @returns_null() -> !fir.ref<none>
+// FUNC-BOX-LABEL: func.func private @returns_null() -> !fir.ref<none>
 func.func private @returns_null() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
 
 // FUNC-REF-LABEL: func @test_address_of_cptr
@@ -323,12 +325,12 @@ func.func @test_address_of_cptr() {
   fir.call @_QMtest_c_func_modPsubr(%1) : (() -> ()) -> ()
   return
 
-  // FUNC-REF: %[[VAL_0:.*]] = fir.address_of(@returns_null) : () -> i64
-  // FUNC-REF: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (() -> i64) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
+  // FUNC-REF: %[[VAL_0:.*]] = fir.address_of(@returns_null) : () -> !fir.ref<none>
+  // FUNC-REF: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (() -> !fir.ref<none>) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
   // FUNC-REF: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> ())
   // FUNC-REF: fir.call @_QMtest_c_func_modPsubr(%[[VAL_2]]) : (() -> ()) -> ()
-  // FUNC-BOX: %[[VAL_0:.*]] = fir.address_of(@returns_null) : () -> i64
-  // FUNC-BOX: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (() -> i64) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
+  // FUNC-BOX: %[[VAL_0:.*]] = fir.address_of(@returns_null) : () -> !fir.ref<none>
+  // FUNC-BOX: %[[VAL_1:.*]] = fir.convert %[[VAL_0]] : (() -> !fir.ref<none>) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
   // FUNC-BOX: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> ())
   // FUNC-BOX: fir.call @_QMtest_c_func_modPsubr(%[[VAL_2]]) : (() -> ()) -> ()
 }
@@ -380,18 +382,20 @@ func.func @test_indirect_calls_return_cptr(%arg0: () -> ()) {
 
   // FUNC-REF: %[[VAL_0:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"}
   // FUNC-REF: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
-  // FUNC-REF: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> i64)
-  // FUNC-REF: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> i64
+  // FUNC-REF: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref<none>)
+  // FUNC-REF: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref<none>
   // FUNC-REF: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
   // FUNC-REF: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
-  // FUNC-REF: fir.store %[[VAL_3]] to %[[VAL_5]] : !fir.ref<i64>
+  // FUNC-REF: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<none>) -> i64
+  // FUNC-REF: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref<i64>
   // FUNC-BOX: %[[VAL_0:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = ".result"}
   // FUNC-BOX: %[[VAL_1:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>)
-  // FUNC-BOX: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> i64)
-  // FUNC-BOX: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> i64
+  // FUNC-BOX: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>) -> (() -> !fir.ref<none>)
+  // FUNC-BOX: %[[VAL_3:.*]] = fir.call %[[VAL_2]]() : () -> !fir.ref<none>
   // FUNC-BOX: %[[VAL_4:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
   // FUNC-BOX: %[[VAL_5:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_4]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
-  // FUNC-BOX: fir.store %[[VAL_3]] to %[[VAL_5]] : !fir.ref<i64>
+  // FUNC-BOX: %[[CAST:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<none>) -> i64
+  // FUNC-BOX: fir.store %[[CAST]] to %[[VAL_5]] : !fir.ref<i64>
 }
 
 // ----------------------- Test GlobalOp rewrite ------------------------

From 411bb691fe4c69bf167b3a03b052c094687f99ce Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs@quicinc.com>
Date: Tue, 23 Jul 2024 18:49:57 +0530
Subject: [PATCH 021/427] [RISCV] Fix InsnCI register type (#100113)

According to the spec the CI type instructions can take any of the 32
RVI registers.

Fixes #100112

(cherry picked from commit 1ebfc81a91194c000ac70b4ea53891cc956aa6eb)
---
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td |  8 ++++----
 llvm/test/MC/RISCV/insn_c.s              | 10 ++++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 9257ee5a09a8e..3f279b7a58ca6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -764,9 +764,9 @@ def InsnCR : DirectiveInsnCR<(outs AnyReg:$rd), (ins uimm2_opcode:$opcode,
                                                      uimm4:$funct4,
                                                      AnyReg:$rs2),
                              "$opcode, $funct4, $rd, $rs2">;
-def InsnCI : DirectiveInsnCI<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
-                                                      uimm3:$funct3,
-                                                      simm6:$imm6),
+def InsnCI : DirectiveInsnCI<(outs AnyReg:$rd), (ins uimm2_opcode:$opcode,
+                                                     uimm3:$funct3,
+                                                     simm6:$imm6),
                              "$opcode, $funct3, $rd, $imm6">;
 def InsnCIW : DirectiveInsnCIW<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
                                                         uimm3:$funct3,
@@ -818,7 +818,7 @@ def : InstAlias<".insn_cr $opcode, $funct4, $rd, $rs2",
                 (InsnCR AnyReg:$rd, uimm2_opcode:$opcode, uimm4:$funct4,
                         AnyReg:$rs2)>;
 def : InstAlias<".insn_ci $opcode, $funct3, $rd, $imm6",
-                (InsnCI AnyRegC:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
+                (InsnCI AnyReg:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
                         simm6:$imm6)>;
 def : InstAlias<".insn_ciw $opcode, $funct3, $rd, $imm8",
                 (InsnCIW AnyRegC:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
diff --git a/llvm/test/MC/RISCV/insn_c.s b/llvm/test/MC/RISCV/insn_c.s
index 19169e8b08c94..c63e8ab33aef9 100644
--- a/llvm/test/MC/RISCV/insn_c.s
+++ b/llvm/test/MC/RISCV/insn_c.s
@@ -31,6 +31,16 @@ target:
 # CHECK-OBJ: c.addi a0, 0xd
 .insn ci C1, 0, a0, 13
 
+# CHECK-ASM: .insn ci  1, 0, a6, 13
+# CHECK-ASM: encoding: [0x35,0x08]
+# CHECK-OBJ: c.addi a6, 0xd
+.insn ci  1, 0, a6, 13
+
+# CHECK-ASM: .insn ci  1, 0, a6, 13
+# CHECK-ASM: encoding: [0x35,0x08]
+# CHECK-OBJ: c.addi a6, 0xd
+.insn ci C1, 0, a6, 13
+
 # CHECK-ASM: .insn ciw  0, 0, a0, 13
 # CHECK-ASM: encoding: [0xa8,0x01]
 # CHECK-OBJ: c.addi4spn a0, sp, 0xc8

From 7af27be6633a18dd3d568594175feb6353320795 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 23 Jul 2024 09:44:00 -0700
Subject: [PATCH 022/427] [ARM] Create mapping symbols with non-unique names

Similar to #99836 for AArch64.

Non-unique names save .strtab space and match GNU assembler.

Pull Request: https://github.com/llvm/llvm-project/pull/99906

(cherry picked from commit 298a9223a57c50cb0d24b82687ad1bc2f7a022e6)
---
 lld/test/ELF/arm-cmse-implib.s                   |  8 ++++----
 .../Target/ARM/MCTargetDesc/ARMELFStreamer.cpp   |  8 ++------
 .../DebugInfo/Symbolize/ELF/arm-mapping-symbol.s | 16 ++++++++--------
 llvm/test/MC/ARM/CheckDataSymbol.s               |  2 +-
 llvm/test/MC/ARM/data-in-code.ll                 |  2 +-
 llvm/test/MC/ARM/directive-arm-thumb-alignment.s |  8 ++++----
 llvm/test/MC/ARM/multi-section-mapping.s         | 12 ++++++------
 llvm/test/MC/ARM/thumb-function-address.s        |  4 ++--
 llvm/test/MC/ARM/thumb-types.s                   | 16 ++++++++--------
 llvm/test/MC/ARM/thumb_set.s                     |  4 ++--
 llvm/test/MC/ELF/ARM/execute-only-section.s      |  6 +++---
 llvm/test/tools/llvm-objdump/multiple-symbols.s  |  4 ++--
 12 files changed, 43 insertions(+), 47 deletions(-)

diff --git a/lld/test/ELF/arm-cmse-implib.s b/lld/test/ELF/arm-cmse-implib.s
index 581bff9dd8536..60a68b0226c3d 100644
--- a/lld/test/ELF/arm-cmse-implib.s
+++ b/lld/test/ELF/arm-cmse-implib.s
@@ -53,8 +53,8 @@ secure_entry:
 // CHECK1-NEXT:    Num:    Value  Size Type    Bind   Vis       Ndx Name
 // CHECK1-NEXT:      0: 00000000     0 NOTYPE  LOCAL  DEFAULT   UND
 // CHECK1-NEXT:      1: 00020000     0 NOTYPE  LOCAL  DEFAULT     2 $t
-// CHECK1-NEXT:      2: 00008000     0 NOTYPE  LOCAL  DEFAULT     1 $t.0
-// CHECK1-NEXT:      3: 00008004     0 NOTYPE  LOCAL  DEFAULT     1 $t.0
+// CHECK1-NEXT:      2: 00008000     0 NOTYPE  LOCAL  DEFAULT     1 $t
+// CHECK1-NEXT:      3: 00008004     0 NOTYPE  LOCAL  DEFAULT     1 $t
 // CHECK1-NEXT:      4: 00008001     2 FUNC    GLOBAL DEFAULT     1 secure_entry
 // CHECK1-NEXT:      5: 00020001     8 FUNC    GLOBAL DEFAULT     2 foo
 // CHECK1-NEXT:      6: 00008005     2 FUNC    GLOBAL DEFAULT     1 __acle_se_foo
@@ -82,8 +82,8 @@ secure_entry:
 // CHECK2-NEXT:    Num:    Value  Size Type    Bind   Vis       Ndx Name
 // CHECK2-NEXT:      0: 00000000     0 NOTYPE  LOCAL  DEFAULT   UND
 // CHECK2-NEXT:      1: 00020000     0 NOTYPE  LOCAL  DEFAULT     2 $t
-// CHECK2-NEXT:      2: 00008000     0 NOTYPE  LOCAL  DEFAULT     1 $t.0
-// CHECK2-NEXT:      3: 00008004     0 NOTYPE  LOCAL  DEFAULT     1 $t.0
+// CHECK2-NEXT:      2: 00008000     0 NOTYPE  LOCAL  DEFAULT     1 $t
+// CHECK2-NEXT:      3: 00008004     0 NOTYPE  LOCAL  DEFAULT     1 $t
 // CHECK2-NEXT:      4: 00008001     2 FUNC    GLOBAL DEFAULT     1 secure_entry
 // CHECK2-NEXT:      5: 00020011     8 FUNC    WEAK   DEFAULT     2 baz
 // CHECK2-NEXT:      6: 00008005     2 FUNC    GLOBAL DEFAULT     1 __acle_se_baz
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 3182fecffecf4..de343a7d72ad9 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -670,8 +670,7 @@ class ARMELFStreamer : public MCELFStreamer {
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
-        Name + "." + Twine(MappingSymbolCounter++)));
+    auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
     emitLabel(Symbol);
 
     Symbol->setType(ELF::STT_NOTYPE);
@@ -679,8 +678,7 @@ class ARMELFStreamer : public MCELFStreamer {
   }
 
   void emitMappingSymbol(StringRef Name, MCDataFragment &F, uint64_t Offset) {
-    auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
-        Name + "." + Twine(MappingSymbolCounter++)));
+    auto *Symbol = cast<MCSymbolELF>(getContext().createLocalSymbol(Name));
     emitLabelAtPos(Symbol, SMLoc(), F, Offset);
     Symbol->setType(ELF::STT_NOTYPE);
     Symbol->setBinding(ELF::STB_LOCAL);
@@ -710,7 +708,6 @@ class ARMELFStreamer : public MCELFStreamer {
 
   bool IsThumb;
   bool IsAndroid;
-  int64_t MappingSymbolCounter = 0;
 
   DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>>
       LastMappingSymbols;
@@ -1121,7 +1118,6 @@ void ARMELFStreamer::reset() {
   MCTargetStreamer &TS = *getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
   ATS.reset();
-  MappingSymbolCounter = 0;
   MCELFStreamer::reset();
   LastMappingSymbols.clear();
   LastEMSInfo.reset();
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/arm-mapping-symbol.s b/llvm/test/DebugInfo/Symbolize/ELF/arm-mapping-symbol.s
index 6e17ef29ae577..27310c09fb07c 100644
--- a/llvm/test/DebugInfo/Symbolize/ELF/arm-mapping-symbol.s
+++ b/llvm/test/DebugInfo/Symbolize/ELF/arm-mapping-symbol.s
@@ -5,19 +5,19 @@
 
 ## Verify that mapping symbols are actually present in the object at expected
 ## addresses.
-# RUN: llvm-nm --special-syms %t | FileCheck %s -check-prefix MAPPING_A
+# RUN: llvm-nm --special-syms %t | FileCheck %s --check-prefix=MAPPING_A --match-full-lines
 
-# MAPPING_A:      00000004 t $a.1
-# MAPPING_A-NEXT: 00000000 t $d.0
-# MAPPING_A-NEXT: 00000008 t $d.2
+# MAPPING_A:      00000004 t $a
+# MAPPING_A-NEXT: 00000000 t $d
+# MAPPING_A-NEXT: 00000008 t $d
 # MAPPING_A-NEXT: 00000000 T foo
 
 # RUN: llvm-mc -filetype=obj -triple=thumbv7-none-linux %s -o %tthumb
-# RUN: llvm-nm --special-syms %tthumb | FileCheck %s -check-prefix MAPPING_T
+# RUN: llvm-nm --special-syms %tthumb | FileCheck %s --check-prefix=MAPPING_T --match-full-lines
 
-# MAPPING_T:      00000000 t $d.0
-# MAPPING_T-NEXT: 00000006 t $d.2
-# MAPPING_T-NEXT: 00000004 t $t.1
+# MAPPING_T:      00000000 t $d
+# MAPPING_T-NEXT: 00000006 t $d
+# MAPPING_T-NEXT: 00000004 t $t
 # MAPPING_T-NEXT: 00000000 T foo
 
 # RUN: llvm-symbolizer --obj=%t 4 8 | FileCheck %s -check-prefix SYMBOL
diff --git a/llvm/test/MC/ARM/CheckDataSymbol.s b/llvm/test/MC/ARM/CheckDataSymbol.s
index 14ea92a943a1e..ec421f51395af 100644
--- a/llvm/test/MC/ARM/CheckDataSymbol.s
+++ b/llvm/test/MC/ARM/CheckDataSymbol.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -filetype=obj -assemble \
 # RUN: -triple=arm-arm-none-eabi -mcpu=cortex-a9 %s -o - \
 # RUN: | llvm-readobj -S --symbols - | FileCheck %s
-# CHECK:     Name: $d.1 ({{[1-9][0-9]+}})
+# CHECK:     Name: $d
 # CHECK-NEXT:     Value: 0x4
 # CHECK-NEXT:     Size: 0
 # CHECK-NEXT:     Binding: Local (0x0)
diff --git a/llvm/test/MC/ARM/data-in-code.ll b/llvm/test/MC/ARM/data-in-code.ll
index 2e107f250e05d..b755c3bb5cad4 100644
--- a/llvm/test/MC/ARM/data-in-code.ll
+++ b/llvm/test/MC/ARM/data-in-code.ll
@@ -72,7 +72,7 @@ exit:
 ;; TMB-NEXT:     Section: [[MIXED_SECT:[^ ]+]]
 
 ;; TMB:        Symbol {
-;; TMB:          Name: $d.1
+;; TMB:          Name: $d
 ;; TMB-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; TMB-NEXT:     Size: 0
 ;; TMB-NEXT:     Binding: Local
diff --git a/llvm/test/MC/ARM/directive-arm-thumb-alignment.s b/llvm/test/MC/ARM/directive-arm-thumb-alignment.s
index b90c76d2b121c..0e798f67b48aa 100644
--- a/llvm/test/MC/ARM/directive-arm-thumb-alignment.s
+++ b/llvm/test/MC/ARM/directive-arm-thumb-alignment.s
@@ -10,12 +10,12 @@
 @ CHECK:      Num:    Value  Size Type    Bind   Vis      Ndx Name
 @ CHECK-NEXT:   0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
 @ CHECK-NEXT:   1: 00000001     0 FUNC    LOCAL  DEFAULT    2 aligned_thumb
-@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $t.0
+@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $t
 @ CHECK-NEXT:   3: 00000004     0 FUNC    LOCAL  DEFAULT    2 thumb_to_arm
-@ CHECK-NEXT:   4: 00000004     0 NOTYPE  LOCAL  DEFAULT    2 $a.1
-@ CHECK-NEXT:   5: 00000008     0 NOTYPE  LOCAL  DEFAULT    2 $d.2
+@ CHECK-NEXT:   4: 00000004     0 NOTYPE  LOCAL  DEFAULT    2 $a
+@ CHECK-NEXT:   5: 00000008     0 NOTYPE  LOCAL  DEFAULT    2 $d
 @ CHECK-NEXT:   6: 0000000b     0 FUNC    LOCAL  DEFAULT    2 unaligned_arm_to_thumb
-@ CHECK-NEXT:   7: 0000000a     0 NOTYPE  LOCAL  DEFAULT    2 $t.3
+@ CHECK-NEXT:   7: 0000000a     0 NOTYPE  LOCAL  DEFAULT    2 $t
 
 .thumb
 
diff --git a/llvm/test/MC/ARM/multi-section-mapping.s b/llvm/test/MC/ARM/multi-section-mapping.s
index 6107f262b0b8c..ed531306042aa 100644
--- a/llvm/test/MC/ARM/multi-section-mapping.s
+++ b/llvm/test/MC/ARM/multi-section-mapping.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s --match-full-lines
 
         .text
         add r0, r0, r0
@@ -42,10 +42,10 @@
 @   + .starts_thumb to have $t at 0
 @   + .starts_data to have $d at 0
 
-@ CHECK:      00000000 l .text 00000000 $a.0
-@ CHECK-NEXT: 00000000 l .wibble 00000000 $a.1
-@ CHECK-NEXT: 00000000 l .starts_thumb 00000000 $t.2
-@ CHECK-NEXT: 00000008 l .text 00000000 $t.3
-@ CHECK-NEXT: 0000000a l .text 00000000 $d.4
+@ CHECK:      00000000 l .text 00000000 $a
+@ CHECK-NEXT: 00000000 l .wibble 00000000 $a
+@ CHECK-NEXT: 00000000 l .starts_thumb 00000000 $t
+@ CHECK-NEXT: 00000008 l .text 00000000 $t
+@ CHECK-NEXT: 0000000a l .text 00000000 $d
 @ CHECK-NOT: ${{[adt]}}
 
diff --git a/llvm/test/MC/ARM/thumb-function-address.s b/llvm/test/MC/ARM/thumb-function-address.s
index 753a049137bbf..d69dcb6724019 100644
--- a/llvm/test/MC/ARM/thumb-function-address.s
+++ b/llvm/test/MC/ARM/thumb-function-address.s
@@ -35,8 +35,8 @@ label:
 @ CHECK-NEXT: 00000000 0 NOTYPE LOCAL DEFAULT     UND
 @ CHECK-NEXT: 00000001 0 FUNC   LOCAL DEFAULT 2   func_label
 @ CHECK-NEXT: 00000001 0 FUNC   LOCAL DEFAULT 2   foo_impl
-@ CHECK-NEXT: 00000000 0 NOTYPE LOCAL DEFAULT 2   $t.0
+@ CHECK-NEXT: 00000000 0 NOTYPE LOCAL DEFAULT 2   $t
 @ CHECK-NEXT: 00000003 0 FUNC   LOCAL DEFAULT 2   foo_resolver
 @ CHECK-NEXT: 00000003 0 IFUNC  LOCAL DEFAULT 2   foo
 @ CHECK-NEXT: 00000004 0 FUNC   LOCAL DEFAULT 2   label
-@ CHECK-NEXT: 00000008 0 NOTYPE LOCAL DEFAULT 2   $a.1
+@ CHECK-NEXT: 00000008 0 NOTYPE LOCAL DEFAULT 2   $a
diff --git a/llvm/test/MC/ARM/thumb-types.s b/llvm/test/MC/ARM/thumb-types.s
index cb1b47e1fa7fb..b965cd8accf05 100644
--- a/llvm/test/MC/ARM/thumb-types.s
+++ b/llvm/test/MC/ARM/thumb-types.s
@@ -3,22 +3,22 @@
 @ CHECK:      Num:    Value  Size Type    Bind   Vis      Ndx Name
 @ CHECK-NEXT:   0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
 @ CHECK-NEXT:   1: 00000001     0 FUNC    LOCAL  DEFAULT    2 implicit_function
-@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $t.0
+@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $t
 @ CHECK-NEXT:   3: 00000002     0 OBJECT  LOCAL  DEFAULT    2 implicit_data
-@ CHECK-NEXT:   4: 00000002     0 NOTYPE  LOCAL  DEFAULT    2 $d.1
+@ CHECK-NEXT:   4: 00000002     0 NOTYPE  LOCAL  DEFAULT    2 $d
 @ CHECK-NEXT:   5: 00000008     0 FUNC    LOCAL  DEFAULT    2 arm_function
-@ CHECK-NEXT:   6: 00000008     0 NOTYPE  LOCAL  DEFAULT    2 $a.2
+@ CHECK-NEXT:   6: 00000008     0 NOTYPE  LOCAL  DEFAULT    2 $a
 @ CHECK-NEXT:   7: 0000000c     0 NOTYPE  LOCAL  DEFAULT    2 untyped_text_label
-@ CHECK-NEXT:   8: 0000000c     0 NOTYPE  LOCAL  DEFAULT    2 $t.3
+@ CHECK-NEXT:   8: 0000000c     0 NOTYPE  LOCAL  DEFAULT    2 $t
 @ CHECK-NEXT:   9: 0000000f     0 FUNC    LOCAL  DEFAULT    2 explicit_function
-@ CHECK-NEXT:  10: 00000010     0 NOTYPE  LOCAL  DEFAULT    2 $d.4
+@ CHECK-NEXT:  10: 00000010     0 NOTYPE  LOCAL  DEFAULT    2 $d
 @ CHECK-NEXT:  11: 00000000     4 TLS     LOCAL  DEFAULT    5 tls
 @ CHECK-NEXT:  12: 00000015     0 IFUNC   LOCAL  DEFAULT    2 indirect_function
-@ CHECK-NEXT:  13: 00000014     0 NOTYPE  LOCAL  DEFAULT    2 $t.5
+@ CHECK-NEXT:  13: 00000014     0 NOTYPE  LOCAL  DEFAULT    2 $t
 @ CHECK-NEXT:  14: 00000000     0 NOTYPE  LOCAL  DEFAULT    4 untyped_data_label
-@ CHECK-NEXT:  15: 00000000     0 NOTYPE  LOCAL  DEFAULT    4 $t.6
+@ CHECK-NEXT:  15: 00000000     0 NOTYPE  LOCAL  DEFAULT    4 $t
 @ CHECK-NEXT:  16: 00000002     0 OBJECT  LOCAL  DEFAULT    4 explicit_data
-@ CHECK-NEXT:  17: 00000002     0 NOTYPE  LOCAL  DEFAULT    4 $d.7
+@ CHECK-NEXT:  17: 00000002     0 NOTYPE  LOCAL  DEFAULT    4 $d
 
 
 	.syntax unified
diff --git a/llvm/test/MC/ARM/thumb_set.s b/llvm/test/MC/ARM/thumb_set.s
index 4bb7b599aaf11..836eb0b62e0fa 100644
--- a/llvm/test/MC/ARM/thumb_set.s
+++ b/llvm/test/MC/ARM/thumb_set.s
@@ -6,12 +6,12 @@
 @ CHECK:      Num:    Value  Size Type    Bind   Vis      Ndx Name
 @ CHECK-NEXT:   0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND
 @ CHECK-NEXT:   1: 00000000     0 FUNC    LOCAL  DEFAULT    2 arm_func
-@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $a.0
+@ CHECK-NEXT:   2: 00000000     0 NOTYPE  LOCAL  DEFAULT    2 $a
 @ CHECK-NEXT:   3: 00000001     0 FUNC    LOCAL  DEFAULT    2 alias_arm_func
 @ CHECK-NEXT:   4: 00000001     0 FUNC    LOCAL  DEFAULT    2 alias_arm_func2
 @ CHECK-NEXT:   5: 00000001     0 FUNC    LOCAL  DEFAULT    2 alias_arm_func3
 @ CHECK-NEXT:   6: 00000005     0 FUNC    LOCAL  DEFAULT    2 thumb_func
-@ CHECK-NEXT:   7: 00000004     0 NOTYPE  LOCAL  DEFAULT    2 $t.1
+@ CHECK-NEXT:   7: 00000004     0 NOTYPE  LOCAL  DEFAULT    2 $t
 @ CHECK-NEXT:   8: 00000005     0 FUNC    LOCAL  DEFAULT    2 alias_thumb_func
 @ CHECK-NEXT:   9: 5eed1e55     0 FUNC    LOCAL  DEFAULT  ABS seedless
 @ CHECK-NEXT:  10: e665a1ad     0 FUNC    LOCAL  DEFAULT  ABS eggsalad
diff --git a/llvm/test/MC/ELF/ARM/execute-only-section.s b/llvm/test/MC/ELF/ARM/execute-only-section.s
index 12020e030cc04..ac5e31f70dba0 100644
--- a/llvm/test/MC/ELF/ARM/execute-only-section.s
+++ b/llvm/test/MC/ELF/ARM/execute-only-section.s
@@ -18,7 +18,7 @@ foo:
 
 
 // CHECK:      Section {
-// CHECK:        Name: .text (16)
+// CHECK:        Name: .text
 // CHECK-NEXT:   Type: SHT_PROGBITS (0x1)
 // CHECK-NEXT:   Flags [ (0x20000006)
 // CHECK-NEXT:     SHF_ALLOC (0x2)
@@ -29,7 +29,7 @@ foo:
 // CHECK:      }
 
 // CHECK:      Section {
-// CHECK:        Name: .text (16)
+// CHECK:        Name: .text
 // CHECK-NEXT:   Type: SHT_PROGBITS (0x1)
 // CHECK-NEXT:   Flags [ (0x20000006)
 // CHECK-NEXT:     SHF_ALLOC (0x2)
@@ -40,6 +40,6 @@ foo:
 // CHECK:      }
 
 // CHECK: Symbol {
-// CHECK:   Name: foo (22)
+// CHECK:   Name: foo
 // CHECK:   Section: .text (0x3)
 // CHECK: }
diff --git a/llvm/test/tools/llvm-objdump/multiple-symbols.s b/llvm/test/tools/llvm-objdump/multiple-symbols.s
index 24c169e32147b..1b13f099ae98c 100644
--- a/llvm/test/tools/llvm-objdump/multiple-symbols.s
+++ b/llvm/test/tools/llvm-objdump/multiple-symbols.s
@@ -26,13 +26,13 @@
 
 @ HEAD:          Disassembly of section .text:
 @ HEAD-EMPTY:
-@ AMAP-NEXT:     00000000 <$a.0>:
+@ AMAP-NEXT:     00000000 <$a>:
 @ AAAA-NEXT:     00000000 <aaaa>:
 @ BBBB-NEXT:     00000000 <bbbb>:
 @ AABB-NEXT:            0: e0800080      add     r0, r0, r0, lsl #1
 @ AABB-NEXT:            4: e12fff1e      bx      lr
 @ BOTH-EMPTY:    
-@ TMAP-NEXT:     00000008 <$t.1>:
+@ TMAP-NEXT:     00000008 <$t>:
 @ CCCC-NEXT:     00000008 <cccc>:
 @ DDDD-NEXT:     00000008 <dddd>:
 @ CCDD-NEXT:            8: eb00 0080     add.w   r0, r0, r0, lsl #2

From 2df1cec9db73697a04e733de39ab9ac63c7b2d9b Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 23 Jul 2024 18:59:23 +0200
Subject: [PATCH 023/427] [libc++][doc] Update the release notes for LLVM 19.
 (#99061)

This is a preparation for the upcoming LLVM 19 release.
---
 libcxx/docs/ReleaseNotes/19.rst    | 47 ++++++++++++++++++++++++------
 libcxx/docs/ReleaseNotes/20.rst    | 14 ++++++++-
 libcxx/docs/Status/SpecialMath.rst |  2 +-
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 439f552db59a8..c2c2bfbed4ac3 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -35,6 +35,20 @@ see the `releases page <https://llvm.org/releases/>`_.
 What's New in Libc++ 19.0.0?
 ==============================
 
+The main focus of the libc++ team has been to implement new C++20, C++23,
+and C++26 features.
+
+Experimental support for the time zone database has progressed.
+
+Work on the ranges support has progressed. See
+:ref:`ranges-status` for the current status.
+
+Work on the experimental C++17 Parallel STL has progressed. See
+:ref:`pstl-status` for the current status.
+
+Work on the C++17 mathematical special functions has started. See
+:ref:`special-math-status` for the current status.
+
 Implemented Papers
 ------------------
 
@@ -59,14 +73,21 @@ Implemented Papers
 - P0019R8 - ``std::atomic_ref``
 - P2389R2 - Alias template ``dims`` for the ``extents`` of ``mdspan``
 - P1223R5 - ``ranges::find_last()``, ``ranges::find_last_if()``, and ``ranges::find_last_if_not()``
+- P2602R2 - Poison Pills are Too Toxic
+- P1981R0 - Rename ``leap`` to ``leap_second``
+- P1982R0 - Rename ``link`` to ``time_zone_link``
+
 
 Improvements and New Features
 -----------------------------
 
 - The performance of growing ``std::vector`` has been improved for trivially relocatable types.
-- A lot of types are considered trivially relocatable now, including ``vector`` and ``string``.
-- The performance of ``ranges::fill`` and ``ranges::fill_n`` has been improved for ``vector<bool>::iterator``\s,
+
+- A lot of types are considered trivially relocatable now, including ``std::vector`` and ``std::string``.
+
+- The performance of ``std::ranges::fill`` and ``std::ranges::fill_n`` has been improved for ``std::vector<bool>::iterator``\s,
   resulting in a performance increase of up to 1400x.
+
 - The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance
   improvements.
 
@@ -74,7 +95,7 @@ Improvements and New Features
   up to 100x.
 
 - The ``std::set_intersection`` and ``std::ranges::set_intersection`` algorithms have been optimized to fast-forward over
-  contiguous ranges of non-matching values, reducing the number of comparisons from linear to 
+  contiguous ranges of non-matching values, reducing the number of comparisons from linear to
   logarithmic growth with the number of elements in best-case scenarios.
 
 - The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ``<strstream>`` available.
@@ -101,15 +122,18 @@ Improvements and New Features
 
   Note: bounded iterators currently are not supported for ``vector<bool>``.
 
+- In C++23 and C++26 the number of transitive includes in several headers has been reduced, improving the compilation speed.
+
+
 Deprecations and Removals
 -------------------------
 
-- The C++20 synchronization library (``<barrier>``, ``<latch>``, ``atomic::wait``, etc.) has been deprecated
+- The C++20 synchronization library (``<barrier>``, ``<latch>``, ``std::atomic::wait``, etc.) has been deprecated
   in language modes prior to C++20. If you are using these features prior to C++20, please update to ``-std=c++20``.
   In LLVM 20, the C++20 synchronization library will be removed entirely in language modes prior to C++20.
 
 - ``_LIBCPP_DISABLE_NODISCARD_EXT`` has been removed. ``[[nodiscard]]`` applications are now unconditional.
-  This decision is based on LEWGs discussion on `P3122 <https://wg21.link/P3122>` and `P3162 <https://wg21.link/P3162>`
+  This decision is based on LEWGs discussion on `P3122 <https://wg21.link/P3122>`_ and `P3162 <https://wg21.link/P3162>`_
   to not use ``[[nodiscard]]`` in the standard.
 
 - The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable that was used to enable the safe mode has been deprecated and setting
@@ -151,10 +175,11 @@ Deprecations and Removals
 - libc++ no longer supports ``std::allocator<const T>`` and containers of ``const``-qualified element type, such
   as ``std::vector<const T>`` and ``std::list<const T>``. This used to be supported as an undocumented extension.
   If you were using ``std::vector<const T>``, replace it with ``std::vector<T>`` instead. The
-  ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro can be defined to temporarily re-enable this extension as
-  folks transition their code. This macro will be honored for one released and ignored starting in LLVM 20.
+  ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro can be defined
+  to temporarily re-enable this extension to make it easier to update user code.
+  This macro will be honored for one released and ignored starting in LLVM 20.
   To assist with the clean-up process, consider running your code through Clang Tidy, with
-  `std-allocator-const <https://clang.llvm.org/extra/clang-tidy/checks/portability/std-allocator-const.html>`
+  `std-allocator-const <https://clang.llvm.org/extra/clang-tidy/checks/portability/std-allocator-const.html>`_
   enabled.
 
 - When configuring libc++ with localization or threads disabled, the library no longer emits an error when
@@ -187,6 +212,9 @@ LLVM 20
   ``_LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS`` macro that was used to re-enable this extension will be
   ignored in LLVM 20.
 
+- The ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro will no longer have an effect.
+
+
 LLVM 21
 ~~~~~~~
 
@@ -197,6 +225,7 @@ LLVM 21
 
   If you are using C++03 in your project, you should consider moving to a newer version of the Standard to get the most out of libc++.
 
+
 ABI Affecting Changes
 ---------------------
 
@@ -211,7 +240,7 @@ Build System Changes
 - The ``LIBCXX_EXECUTOR`` and ``LIBCXXABI_EXECUTOR`` CMake variables have been removed. Please
   set ``LIBCXX_TEST_PARAMS`` to ``executor=<...>`` instead.
 
-- The Cmake variable ``LIBCXX_ENABLE_CLANG_TIDY`` has been removed. The build system has been changed
+- The CMake variable ``LIBCXX_ENABLE_CLANG_TIDY`` has been removed. The build system has been changed
   to automatically detect the presence of ``clang-tidy`` and the required ``Clang`` libraries.
 
 - The CMake options ``LIBCXX_INSTALL_MODULES`` now defaults to ``ON``.
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index fb677b1667ddc..f959c8829277e 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -59,16 +59,28 @@ Deprecations and Removals
   ``_LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS`` macro that was used to re-enable this extension will be
   ignored in LLVM 20.
 
+- TODO: The ``_LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST`` macro will no longer have an effect.
 
 Upcoming Deprecations and Removals
 ----------------------------------
 
-LLVM 21
+LLVM 20
 ~~~~~~~
 
 - TODO
 
 
+LLVM 21
+~~~~~~~
+
+- The status of the C++03 implementation will be frozen after the LLVM 21 release. This means that starting in LLVM 22, non-critical bug fixes may not be back-ported
+  to C++03, including LWG issues. C++03 is a legacy platform, where most projects are no longer actively maintained. To
+  reduce the amount of fixes required to keep such legacy projects compiling with up-to-date toolchains, libc++ will aim to freeze the status of the headers in C++03 mode to avoid unintended breaking changes.
+  See https://discourse.llvm.org/t/rfc-freezing-c-03-headers-in-libc for more details.
+
+  If you are using C++03 in your project, you should consider moving to a newer version of the Standard to get the most out of libc++.
+
+
 ABI Affecting Changes
 ---------------------
 
diff --git a/libcxx/docs/Status/SpecialMath.rst b/libcxx/docs/Status/SpecialMath.rst
index fcc9f03e3ae64..46e5c97cdaab2 100644
--- a/libcxx/docs/Status/SpecialMath.rst
+++ b/libcxx/docs/Status/SpecialMath.rst
@@ -1,4 +1,4 @@
-.. special-math-status:
+.. _special-math-status:
 
 ======================================================
 libc++ Mathematical Special Functions Status (P0226R1)

From 3120547296c558634261ec944d7846be56eba306 Mon Sep 17 00:00:00 2001
From: Ian Anderson <iana@apple.com>
Date: Tue, 23 Jul 2024 13:02:59 -0700
Subject: [PATCH 024/427] [clang][headers] Including stddef.h always redefines
 NULL (#99727)

stddef.h always includes __stddef_null.h. This is fine in modules
because it's not possible to re-include the pcm, and it's necessary to
export the _Builtin_stddef.null submodule. However, without modules it
causes NULL to always get redefined which disrupts some C++ code. Rework
the inclusion of __stddef_null.h so that with not building with modules
it's only included if __need_NULL is set by the includer, or it's the
first time stddef.h is being included.

(cherry picked from commit 92a9d4831d5e40c286247c30fcd794563adbef6e)
---
 clang/lib/Headers/stdarg.h         |  4 +-
 clang/lib/Headers/stddef.h         | 21 ++++++++-
 clang/test/Headers/stddefneeds.cpp | 15 ++++--
 clang/test/Modules/stddef.cpp      | 73 ++++++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Modules/stddef.cpp

diff --git a/clang/lib/Headers/stdarg.h b/clang/lib/Headers/stdarg.h
index 8292ab907becf..6203d7a600a23 100644
--- a/clang/lib/Headers/stdarg.h
+++ b/clang/lib/Headers/stdarg.h
@@ -20,19 +20,18 @@
  * modules.
  */
 #if defined(__MVS__) && __has_include_next(<stdarg.h>)
-#include <__stdarg_header_macro.h>
 #undef __need___va_list
 #undef __need_va_list
 #undef __need_va_arg
 #undef __need___va_copy
 #undef __need_va_copy
+#include <__stdarg_header_macro.h>
 #include_next <stdarg.h>
 
 #else
 #if !defined(__need___va_list) && !defined(__need_va_list) &&                  \
     !defined(__need_va_arg) && !defined(__need___va_copy) &&                   \
     !defined(__need_va_copy)
-#include <__stdarg_header_macro.h>
 #define __need___va_list
 #define __need_va_list
 #define __need_va_arg
@@ -45,6 +44,7 @@
     !defined(__STRICT_ANSI__)
 #define __need_va_copy
 #endif
+#include <__stdarg_header_macro.h>
 #endif
 
 #ifdef __need___va_list
diff --git a/clang/lib/Headers/stddef.h b/clang/lib/Headers/stddef.h
index 8985c526e8fc5..99b275aebf5aa 100644
--- a/clang/lib/Headers/stddef.h
+++ b/clang/lib/Headers/stddef.h
@@ -20,7 +20,6 @@
  * modules.
  */
 #if defined(__MVS__) && __has_include_next(<stddef.h>)
-#include <__stddef_header_macro.h>
 #undef __need_ptrdiff_t
 #undef __need_size_t
 #undef __need_rsize_t
@@ -31,6 +30,7 @@
 #undef __need_max_align_t
 #undef __need_offsetof
 #undef __need_wint_t
+#include <__stddef_header_macro.h>
 #include_next <stddef.h>
 
 #else
@@ -40,7 +40,6 @@
     !defined(__need_NULL) && !defined(__need_nullptr_t) &&                     \
     !defined(__need_unreachable) && !defined(__need_max_align_t) &&            \
     !defined(__need_offsetof) && !defined(__need_wint_t)
-#include <__stddef_header_macro.h>
 #define __need_ptrdiff_t
 #define __need_size_t
 /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
@@ -49,7 +48,24 @@
 #define __need_rsize_t
 #endif
 #define __need_wchar_t
+#if !defined(__STDDEF_H) || __has_feature(modules)
+/*
+ * __stddef_null.h is special when building without modules: if __need_NULL is
+ * set, then it will unconditionally redefine NULL. To avoid stepping on client
+ * definitions of NULL, __need_NULL should only be set the first time this
+ * header is included, that is when __STDDEF_H is not defined. However, when
+ * building with modules, this header is a textual header and needs to
+ * unconditionally include __stdef_null.h to support multiple submodules
+ * exporting _Builtin_stddef.null. Take module SM with submodules A and B, whose
+ * headers both include stddef.h When SM.A builds, __STDDEF_H will be defined.
+ * When SM.B builds, the definition from SM.A will leak when building without
+ * local submodule visibility. stddef.h wouldn't include __stddef_null.h, and
+ * SM.B wouldn't import _Builtin_stddef.null, and SM.B's `export *` wouldn't
+ * export NULL as expected. When building with modules, always include
+ * __stddef_null.h so that everything works as expected.
+ */
 #define __need_NULL
+#endif
 #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) ||              \
     defined(__cplusplus)
 #define __need_nullptr_t
@@ -65,6 +81,7 @@
 /* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
  * for compatibility, but must be explicitly requested. Therefore
  * __need_wint_t is intentionally not defined here. */
+#include <__stddef_header_macro.h>
 #endif
 
 #if defined(__need_ptrdiff_t)
diff --git a/clang/test/Headers/stddefneeds.cpp b/clang/test/Headers/stddefneeds.cpp
index 0763bbdee13ae..0282e8afa600d 100644
--- a/clang/test/Headers/stddefneeds.cpp
+++ b/clang/test/Headers/stddefneeds.cpp
@@ -56,14 +56,21 @@ max_align_t m5;
 #undef NULL
 #define NULL 0
 
-// glibc (and other) headers then define __need_NULL and rely on stddef.h
-// to redefine NULL to the correct value again.
-#define __need_NULL
+// Including stddef.h again shouldn't redefine NULL
 #include <stddef.h>
 
 // gtk headers then use __attribute__((sentinel)), which doesn't work if NULL
 // is 0.
-void f(const char* c, ...) __attribute__((sentinel));
+void f(const char* c, ...) __attribute__((sentinel)); // expected-note{{function has been explicitly marked sentinel here}}
 void g() {
+  f("", NULL); // expected-warning{{missing sentinel in function call}}
+}
+
+// glibc (and other) headers then define __need_NULL and rely on stddef.h
+// to redefine NULL to the correct value again.
+#define __need_NULL
+#include <stddef.h>
+
+void h() {
   f("", NULL);  // Shouldn't warn.
 }
diff --git a/clang/test/Modules/stddef.cpp b/clang/test/Modules/stddef.cpp
new file mode 100644
index 0000000000000..c53bfa3485194
--- /dev/null
+++ b/clang/test/Modules/stddef.cpp
@@ -0,0 +1,73 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/no-lsv -I%t %t/stddef.cpp -verify
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -fmodules-cache-path=%t/lsv -I%t %t/stddef.cpp -verify
+
+//--- stddef.cpp
+#include <b.h>
+
+void *pointer = NULL;
+size_t size = 0;
+
+// When building with modules, a pcm is never re-imported, so re-including
+// stddef.h will not re-import _Builtin_stddef.null to restore the definition of
+// NULL, even though stddef.h will unconditionally include __stddef_null.h when
+// building with modules.
+#undef NULL
+#include <stddef.h>
+
+void *anotherPointer = NULL; // expected-error{{use of undeclared identifier 'NULL'}}
+
+// stddef.h needs to be a `textual` header to support clients doing things like
+// this.
+//
+// #define __need_NULL
+// #include <stddef.h>
+//
+// As a textual header designed to be included multiple times, it can't directly
+// declare anything, or those declarations would go into every module that
+// included it. e.g. if stddef.h contained all of its declarations, and modules
+// A and B included stddef.h, they would both have the declaration for size_t.
+// That breaks Swift, which uses the module name as part of the type name, i.e.
+// A.size_t and B.size_t are treated as completely different types in Swift and
+// cannot be interchanged. To fix that, stddef.h (and stdarg.h) are split out
+// into a separate file per __need macro that can be normal headers in explicit
+// submodules. That runs into yet another wrinkle though. When modules build,
+// declarations from previous submodules leak into subsequent ones when not
+// using local submodule visibility. Consider if stddef.h did the normal thing.
+//
+// #ifndef __STDDEF_H
+// #define __STDDEF_H
+// // include all of the sub-headers
+// #endif
+//
+// When SM builds without local submodule visibility, it will precompile a.h
+// first. When it gets to b.h, the __STDDEF_H declaration from precompiling a.h
+// will leak, and so when b.h includes stddef.h, it won't include any of its
+// sub-headers, and SM.B will thus not import _Builtin_stddef or make any of its
+// submodules visible. Precompiling b.h will be fine since it sees all of the
+// declarations from a.h including stddef.h, but clients that only include b.h
+// will not see any of the stddef.h types. stddef.h thus has to make sure to
+// always include the necessary sub-headers, even if they've been included
+// already. They all have their own header guards to allow this.
+// __stddef_null.h is extra special, so this test makes sure to cover NULL plus
+// one of the normal stddef.h types.
+
+//--- module.modulemap
+module SM {
+  module A {
+    header "a.h"
+    export *
+  }
+
+  module B {
+    header "b.h"
+    export *
+  }
+}
+
+//--- a.h
+#include <stddef.h>
+
+//--- b.h
+#include <stddef.h>

From c5f34876cd00c01bf5ecd38a2bfc5377867e4b04 Mon Sep 17 00:00:00 2001
From: Wesley Wiser <wwiser@gmail.com>
Date: Tue, 23 Jul 2024 11:43:30 -0500
Subject: [PATCH 025/427] [LLVM] [MC] Update frame layout & CFI generation to
 handle frames larger than 2gb (#99263)

Rebase of #84114. I've only included the core changes to frame layout
calculation & CFI generation which sidesteps the regressions found after
merging #84114. Since these changes are a necessary precursor to the
overall fix and are themselves slightly beneficial as CFI is now
generated correctly, I think it is reasonable to merge this first step.

---

For very large stack frames, the offset from the stack pointer to a
local can be more than 2^31 which overflows various `int` offsets in the
frame lowering code.

This patch updates the frame lowering code to calculate the offsets as
64-bit values and fixes CFI to use the corrected sizes.

After this patch, additional work is needed to fix offset truncations in
each target's codegen.

(cherry picked from commit ca076f7a63f6a80e2e38315ec462be354b196b8d)
---
 llvm/include/llvm/CodeGen/MachineFrameInfo.h  | 14 +++---
 .../llvm/CodeGen/TargetFrameLowering.h        |  4 +-
 llvm/include/llvm/MC/MCAsmBackend.h           |  2 +-
 llvm/include/llvm/MC/MCDwarf.h                | 44 +++++++++----------
 llvm/lib/CodeGen/CFIInstrInserter.cpp         | 10 ++---
 llvm/lib/CodeGen/MachineFrameInfo.cpp         |  2 +-
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     |  4 +-
 llvm/lib/MC/MCDwarf.cpp                       |  6 +--
 .../MCTargetDesc/AArch64AsmBackend.cpp        |  8 ++--
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      |  4 +-
 .../Target/ARM/MCTargetDesc/ARMAsmBackend.cpp |  2 +-
 .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h    |  2 +-
 .../Target/Hexagon/HexagonFrameLowering.cpp   |  4 +-
 .../lib/Target/MSP430/MSP430FrameLowering.cpp |  2 +-
 .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 12 ++---
 llvm/lib/Target/X86/X86FrameLowering.cpp      |  4 +-
 llvm/test/CodeGen/PowerPC/huge-frame-size.ll  |  2 +-
 llvm/test/CodeGen/RISCV/pr88365.ll            |  2 +-
 llvm/test/CodeGen/X86/huge-stack.ll           |  2 +-
 19 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 466fed7fb3a29..213b7ec6b3fbf 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -251,7 +251,7 @@ class MachineFrameInfo {
   /// targets, this value is only used when generating debug info (via
   /// TargetRegisterInfo::getFrameIndexReference); when generating code, the
   /// corresponding adjustments are performed directly.
-  int OffsetAdjustment = 0;
+  int64_t OffsetAdjustment = 0;
 
   /// The prolog/epilog code inserter may process objects that require greater
   /// alignment than the default alignment the target provides.
@@ -280,7 +280,7 @@ class MachineFrameInfo {
   /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
   /// class).  This information is important for frame pointer elimination.
   /// It is only valid during and after prolog/epilog code insertion.
-  unsigned MaxCallFrameSize = ~0u;
+  uint64_t MaxCallFrameSize = ~UINT64_C(0);
 
   /// The number of bytes of callee saved registers that the target wants to
   /// report for the current function in the CodeView S_FRAMEPROC record.
@@ -593,10 +593,10 @@ class MachineFrameInfo {
   uint64_t estimateStackSize(const MachineFunction &MF) const;
 
   /// Return the correction for frame offsets.
-  int getOffsetAdjustment() const { return OffsetAdjustment; }
+  int64_t getOffsetAdjustment() const { return OffsetAdjustment; }
 
   /// Set the correction for frame offsets.
-  void setOffsetAdjustment(int Adj) { OffsetAdjustment = Adj; }
+  void setOffsetAdjustment(int64_t Adj) { OffsetAdjustment = Adj; }
 
   /// Return the alignment in bytes that this function must be aligned to,
   /// which is greater than the default stack alignment provided by the target.
@@ -663,7 +663,7 @@ class MachineFrameInfo {
   /// CallFrameSetup/Destroy pseudo instructions are used by the target, and
   /// then only during or after prolog/epilog code insertion.
   ///
-  unsigned getMaxCallFrameSize() const {
+  uint64_t getMaxCallFrameSize() const {
     // TODO: Enable this assert when targets are fixed.
     //assert(isMaxCallFrameSizeComputed() && "MaxCallFrameSize not computed yet");
     if (!isMaxCallFrameSizeComputed())
@@ -671,9 +671,9 @@ class MachineFrameInfo {
     return MaxCallFrameSize;
   }
   bool isMaxCallFrameSizeComputed() const {
-    return MaxCallFrameSize != ~0u;
+    return MaxCallFrameSize != ~UINT64_C(0);
   }
-  void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+  void setMaxCallFrameSize(uint64_t S) { MaxCallFrameSize = S; }
 
   /// Returns how many bytes of callee-saved registers the target pushed in the
   /// prologue. Only used for debug info.
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0b9cacecc7cbe..72978b2f746d7 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -51,7 +51,7 @@ class TargetFrameLowering {
   // Maps a callee saved register to a stack slot with a fixed offset.
   struct SpillSlot {
     unsigned Reg;
-    int Offset; // Offset relative to stack pointer on function entry.
+    int64_t Offset; // Offset relative to stack pointer on function entry.
   };
 
   struct DwarfFrameBase {
@@ -66,7 +66,7 @@ class TargetFrameLowering {
       // Used with FrameBaseKind::Register.
       unsigned Reg;
       // Used with FrameBaseKind::CFA.
-      int Offset;
+      int64_t Offset;
       struct WasmFrameBase WasmLoc;
     } Location;
   };
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 736f44686689b..d1d1814dd8b52 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -225,7 +225,7 @@ class MCAsmBackend {
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
 
   /// Generate the compact unwind encoding for the CFI instructions.
-  virtual uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+  virtual uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
                                                  const MCContext *Ctxt) const {
     return 0;
   }
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index d0e45ab59a92e..7dba67efa22fa 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -509,11 +509,11 @@ class MCCFIInstruction {
   union {
     struct {
       unsigned Register;
-      int Offset;
+      int64_t Offset;
     } RI;
     struct {
       unsigned Register;
-      int Offset;
+      int64_t Offset;
       unsigned AddressSpace;
     } RIA;
     struct {
@@ -527,7 +527,7 @@ class MCCFIInstruction {
   std::vector<char> Values;
   std::string Comment;
 
-  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, SMLoc Loc,
+  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, SMLoc Loc,
                    StringRef V = "", StringRef Comment = "")
       : Label(L), Operation(Op), Loc(Loc), Values(V.begin(), V.end()),
         Comment(Comment) {
@@ -539,7 +539,7 @@ class MCCFIInstruction {
     assert(Op == OpRegister);
     U.RR = {R1, R2};
   }
-  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, unsigned AS,
+  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int64_t O, unsigned AS,
                    SMLoc Loc)
       : Label(L), Operation(Op), Loc(Loc) {
     assert(Op == OpLLVMDefAspaceCfa);
@@ -555,8 +555,8 @@ class MCCFIInstruction {
 public:
   /// .cfi_def_cfa defines a rule for computing CFA as: take address from
   /// Register and add Offset to it.
-  static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset,
-                                    SMLoc Loc = {}) {
+  static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register,
+                                    int64_t Offset, SMLoc Loc = {}) {
     return MCCFIInstruction(OpDefCfa, L, Register, Offset, Loc);
   }
 
@@ -564,13 +564,13 @@ class MCCFIInstruction {
   /// on Register will be used instead of the old one. Offset remains the same.
   static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register,
                                                SMLoc Loc = {}) {
-    return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, Loc);
+    return MCCFIInstruction(OpDefCfaRegister, L, Register, INT64_C(0), Loc);
   }
 
   /// .cfi_def_cfa_offset modifies a rule for computing CFA. Register
   /// remains the same, but offset is new. Note that it is the absolute offset
   /// that will be added to a defined register to the compute CFA address.
-  static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset,
+  static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset,
                                           SMLoc Loc = {}) {
     return MCCFIInstruction(OpDefCfaOffset, L, 0, Offset, Loc);
   }
@@ -578,7 +578,7 @@ class MCCFIInstruction {
   /// .cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but
   /// Offset is a relative value that is added/subtracted from the previous
   /// offset.
-  static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment,
+  static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment,
                                                 SMLoc Loc = {}) {
     return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, Loc);
   }
@@ -588,7 +588,7 @@ class MCCFIInstruction {
   /// be the result of evaluating the DWARF operation expression
   /// `DW_OP_constu AS; DW_OP_aspace_bregx R, B` as a location description.
   static MCCFIInstruction createLLVMDefAspaceCfa(MCSymbol *L, unsigned Register,
-                                                 int Offset,
+                                                 int64_t Offset,
                                                  unsigned AddressSpace,
                                                  SMLoc Loc) {
     return MCCFIInstruction(OpLLVMDefAspaceCfa, L, Register, Offset,
@@ -598,7 +598,7 @@ class MCCFIInstruction {
   /// .cfi_offset Previous value of Register is saved at offset Offset
   /// from CFA.
   static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register,
-                                       int Offset, SMLoc Loc = {}) {
+                                       int64_t Offset, SMLoc Loc = {}) {
     return MCCFIInstruction(OpOffset, L, Register, Offset, Loc);
   }
 
@@ -606,7 +606,7 @@ class MCCFIInstruction {
   /// Offset from the current CFA register. This is transformed to .cfi_offset
   /// using the known displacement of the CFA register from the CFA.
   static MCCFIInstruction createRelOffset(MCSymbol *L, unsigned Register,
-                                          int Offset, SMLoc Loc = {}) {
+                                          int64_t Offset, SMLoc Loc = {}) {
     return MCCFIInstruction(OpRelOffset, L, Register, Offset, Loc);
   }
 
@@ -619,12 +619,12 @@ class MCCFIInstruction {
 
   /// .cfi_window_save SPARC register window is saved.
   static MCCFIInstruction createWindowSave(MCSymbol *L, SMLoc Loc = {}) {
-    return MCCFIInstruction(OpWindowSave, L, 0, 0, Loc);
+    return MCCFIInstruction(OpWindowSave, L, 0, INT64_C(0), Loc);
   }
 
   /// .cfi_negate_ra_state AArch64 negate RA state.
   static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc = {}) {
-    return MCCFIInstruction(OpNegateRAState, L, 0, 0, Loc);
+    return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc);
   }
 
   /// .cfi_restore says that the rule for Register is now the same as it
@@ -632,31 +632,31 @@ class MCCFIInstruction {
   /// by .cfi_startproc were executed.
   static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register,
                                         SMLoc Loc = {}) {
-    return MCCFIInstruction(OpRestore, L, Register, 0, Loc);
+    return MCCFIInstruction(OpRestore, L, Register, INT64_C(0), Loc);
   }
 
   /// .cfi_undefined From now on the previous value of Register can't be
   /// restored anymore.
   static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register,
                                           SMLoc Loc = {}) {
-    return MCCFIInstruction(OpUndefined, L, Register, 0, Loc);
+    return MCCFIInstruction(OpUndefined, L, Register, INT64_C(0), Loc);
   }
 
   /// .cfi_same_value Current value of Register is the same as in the
   /// previous frame. I.e., no restoration is needed.
   static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register,
                                           SMLoc Loc = {}) {
-    return MCCFIInstruction(OpSameValue, L, Register, 0, Loc);
+    return MCCFIInstruction(OpSameValue, L, Register, INT64_C(0), Loc);
   }
 
   /// .cfi_remember_state Save all current rules for all registers.
   static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc = {}) {
-    return MCCFIInstruction(OpRememberState, L, 0, 0, Loc);
+    return MCCFIInstruction(OpRememberState, L, 0, INT64_C(0), Loc);
   }
 
   /// .cfi_restore_state Restore the previously saved state.
   static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc = {}) {
-    return MCCFIInstruction(OpRestoreState, L, 0, 0, Loc);
+    return MCCFIInstruction(OpRestoreState, L, 0, INT64_C(0), Loc);
   }
 
   /// .cfi_escape Allows the user to add arbitrary bytes to the unwind
@@ -667,7 +667,7 @@ class MCCFIInstruction {
   }
 
   /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
-  static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size,
+  static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size,
                                             SMLoc Loc = {}) {
     return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, Loc);
   }
@@ -702,7 +702,7 @@ class MCCFIInstruction {
     return U.RIA.AddressSpace;
   }
 
-  int getOffset() const {
+  int64_t getOffset() const {
     if (Operation == OpLLVMDefAspaceCfa)
       return U.RIA.Offset;
     assert(Operation == OpDefCfa || Operation == OpOffset ||
@@ -736,7 +736,7 @@ struct MCDwarfFrameInfo {
   unsigned CurrentCfaRegister = 0;
   unsigned PersonalityEncoding = 0;
   unsigned LsdaEncoding = 0;
-  uint32_t CompactUnwindEncoding = 0;
+  uint64_t CompactUnwindEncoding = 0;
   bool IsSignalFrame = false;
   bool IsSimple = false;
   unsigned RAReg = static_cast<unsigned>(INT_MAX);
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1ff01ad34b30e..06de92515c044 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -68,9 +68,9 @@ class CFIInstrInserter : public MachineFunctionPass {
   struct MBBCFAInfo {
     MachineBasicBlock *MBB;
     /// Value of cfa offset valid at basic block entry.
-    int IncomingCFAOffset = -1;
+    int64_t IncomingCFAOffset = -1;
     /// Value of cfa offset valid at basic block exit.
-    int OutgoingCFAOffset = -1;
+    int64_t OutgoingCFAOffset = -1;
     /// Value of cfa register valid at basic block entry.
     unsigned IncomingCFARegister = 0;
     /// Value of cfa register valid at basic block exit.
@@ -120,7 +120,7 @@ class CFIInstrInserter : public MachineFunctionPass {
   /// Return the cfa offset value that should be set at the beginning of a MBB
   /// if needed. The negated value is needed when creating CFI instructions that
   /// set absolute offset.
-  int getCorrectCFAOffset(MachineBasicBlock *MBB) {
+  int64_t getCorrectCFAOffset(MachineBasicBlock *MBB) {
     return MBBVector[MBB->getNumber()].IncomingCFAOffset;
   }
 
@@ -175,7 +175,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
 
 void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
   // Outgoing cfa offset set by the block.
-  int SetOffset = MBBInfo.IncomingCFAOffset;
+  int64_t SetOffset = MBBInfo.IncomingCFAOffset;
   // Outgoing cfa register set by the block.
   unsigned SetRegister = MBBInfo.IncomingCFARegister;
   MachineFunction *MF = MBBInfo.MBB->getParent();
@@ -188,7 +188,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
   for (MachineInstr &MI : *MBBInfo.MBB) {
     if (MI.isCFIInstruction()) {
       std::optional<unsigned> CSRReg;
-      std::optional<int> CSROffset;
+      std::optional<int64_t> CSROffset;
       unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
       const MCCFIInstruction &CFI = Instrs[CFIIndex];
       switch (CFI.getOperation()) {
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 853de4c88caeb..e4b993850f73d 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -197,7 +197,7 @@ void MachineFrameInfo::computeMaxCallFrameSize(
     for (MachineInstr &MI : MBB) {
       unsigned Opcode = MI.getOpcode();
       if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) {
-        unsigned Size = TII.getFrameSize(MI);
+        uint64_t Size = TII.getFrameSize(MI);
         MaxCallFrameSize = std::max(MaxCallFrameSize, Size);
         if (FrameSDOps != nullptr)
           FrameSDOps->push_back(&MI);
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 3db5e17615fd4..cd5d877e53d82 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -366,8 +366,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
     return;
 
   // (Re-)Compute the MaxCallFrameSize.
-  [[maybe_unused]] uint32_t MaxCFSIn =
-      MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT32_MAX;
+  [[maybe_unused]] uint64_t MaxCFSIn =
+      MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT64_MAX;
   std::vector<MachineBasicBlock::iterator> FrameSDOps;
   MFI.computeMaxCallFrameSize(MF, &FrameSDOps);
   assert(MFI.getMaxCallFrameSize() <= MaxCFSIn &&
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index efafd555c5c5c..1297dc3828b58 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -1299,8 +1299,8 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
 namespace {
 
 class FrameEmitterImpl {
-  int CFAOffset = 0;
-  int InitialCFAOffset = 0;
+  int64_t CFAOffset = 0;
+  int64_t InitialCFAOffset = 0;
   bool IsEH;
   MCObjectStreamer &Streamer;
 
@@ -1414,7 +1414,7 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) {
     if (!IsEH)
       Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg);
 
-    int Offset = Instr.getOffset();
+    int64_t Offset = Instr.getOffset();
     if (IsRelative)
       Offset -= CFAOffset;
     Offset = Offset / dataAlignmentFactor;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index be470c71ae8b6..be34a649e1c4b 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -599,7 +599,7 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
   }
 
   /// Generate the compact unwind encoding from the CFI directives.
-  uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+  uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
                                          const MCContext *Ctxt) const override {
     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
     if (Instrs.empty())
@@ -609,10 +609,10 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
       return CU::UNWIND_ARM64_MODE_DWARF;
 
     bool HasFP = false;
-    unsigned StackSize = 0;
+    uint64_t StackSize = 0;
 
-    uint32_t CompactUnwindEncoding = 0;
-    int CurOffset = 0;
+    uint64_t CompactUnwindEncoding = 0;
+    int64_t CurOffset = 0;
     for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
       const MCCFIInstruction &Inst = Instrs[i];
 
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 62d01b9f7e90b..40354f9955989 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1166,7 +1166,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
         if (STI.splitFramePushPop(MF)) {
           unsigned DwarfReg = MRI->getDwarfRegNum(
               Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
-          unsigned Offset = MFI.getObjectOffset(FI);
+          int64_t Offset = MFI.getObjectOffset(FI);
           unsigned CFIIndex = MF.addFrameInst(
               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1188,7 +1188,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
           (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
         unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-        unsigned Offset = MFI.getObjectOffset(FI);
+        int64_t Offset = MFI.getObjectOffset(FI);
         unsigned CFIIndex = MF.addFrameInst(
             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index eb55a2b5e70b8..994b43f1abb49 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1146,7 +1146,7 @@ enum CompactUnwindEncodings {
 /// instructions. If the CFI instructions describe a frame that cannot be
 /// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
 /// tells the runtime to fallback and unwind using dwarf.
-uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+uint64_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
     const MCDwarfFrameInfo *FI, const MCContext *Ctxt) const {
   DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
   // Only armv7k uses CFI based unwinding.
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index ac0c9b101cae1..9c958003ca756 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -34,7 +34,7 @@ class ARMAsmBackendDarwin : public ARMAsmBackend {
         /*Is64Bit=*/false, cantFail(MachO::getCPUType(TT)), Subtype);
   }
 
-  uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+  uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
                                          const MCContext *Ctxt) const override;
 };
 } // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 6ca18528591af..05357de40e3a9 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1659,7 +1659,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
   using SpillSlot = TargetFrameLowering::SpillSlot;
 
   unsigned NumFixed;
-  int MinOffset = 0;  // CS offsets are negative.
+  int64_t MinOffset = 0; // CS offsets are negative.
   const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
   for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
     if (!SRegs[S->Reg])
@@ -1678,7 +1678,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
     Register R = x;
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
     unsigned Size = TRI->getSpillSize(*RC);
-    int Off = MinOffset - Size;
+    int64_t Off = MinOffset - Size;
     Align Alignment = std::min(TRI->getSpillAlign(*RC), getStackAlign());
     Off &= -Alignment.value();
     int FI = MFI.CreateFixedSpillStackObject(Size, Off);
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index f4d703ebeeab2..d0dc6dd146efd 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -293,7 +293,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
 
   if (!hasFP(MF)) {
     MBBI = FirstCSPop;
-    int64_t Offset = -CSSize - 2;
+    int64_t Offset = -(int64_t)CSSize - 2;
     // Mark callee-saved pop instruction.
     // Define the current CFA rule to use the provided offset.
     while (MBBI != MBB.end()) {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a3ef11b2cab45..fcc61d0a5e2f6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1312,7 +1312,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
 
   /// Implementation of algorithm to generate the compact unwind encoding
   /// for the CFI instructions.
-  uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+  uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
                                          const MCContext *Ctxt) const override {
     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
     if (Instrs.empty()) return 0;
@@ -1327,13 +1327,13 @@ class DarwinX86AsmBackend : public X86AsmBackend {
     bool HasFP = false;
 
     // Encode that we are using EBP/RBP as the frame pointer.
-    uint32_t CompactUnwindEncoding = 0;
+    uint64_t CompactUnwindEncoding = 0;
 
     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
     unsigned InstrOffset = 0;
     unsigned StackAdjust = 0;
-    unsigned StackSize = 0;
-    int MinAbsOffset = std::numeric_limits<int>::max();
+    uint64_t StackSize = 0;
+    int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
 
     for (const MCCFIInstruction &Inst : Instrs) {
       switch (Inst.getOperation()) {
@@ -1360,7 +1360,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
         memset(SavedRegs, 0, sizeof(SavedRegs));
         StackAdjust = 0;
         SavedRegIdx = 0;
-        MinAbsOffset = std::numeric_limits<int>::max();
+        MinAbsOffset = std::numeric_limits<int64_t>::max();
         InstrOffset += MoveInstrSize;
         break;
       }
@@ -1403,7 +1403,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
         SavedRegs[SavedRegIdx++] = Reg;
         StackAdjust += OffsetSize;
-        MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
+        MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
         InstrOffset += PushInstrSize(Reg);
         break;
       }
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 0ff50d8ef678e..bdc9a0d29670a 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -473,7 +473,7 @@ void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
                                : FramePtr;
   unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
   // Offset = space for return address + size of the frame pointer itself.
-  unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
+  int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
   BuildCFI(MBB, MBBI, DebugLoc{},
            MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
   emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
@@ -2553,7 +2553,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
   if (!HasFP && NeedsDwarfCFI) {
     MBBI = FirstCSPop;
-    int64_t Offset = -CSSize - SlotSize;
+    int64_t Offset = -(int64_t)CSSize - SlotSize;
     // Mark callee-saved pop instruction.
     // Define the current CFA rule to use the provided offset.
     while (MBBI != MBB.end()) {
diff --git a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
index f1039df6f549a..78bdac021ac8a 100644
--- a/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
+++ b/llvm/test/CodeGen/PowerPC/huge-frame-size.ll
@@ -18,7 +18,7 @@ define void @foo(i8 %x) {
 ; CHECK-LE-NEXT:    oris 0, 0, 65535
 ; CHECK-LE-NEXT:    ori 0, 0, 65504
 ; CHECK-LE-NEXT:    stdux 1, 1, 0
-; CHECK-LE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 4294967328
 ; CHECK-LE-NEXT:    li 4, 1
 ; CHECK-LE-NEXT:    addi 5, 1, 32
 ; CHECK-LE-NEXT:    stb 3, 32(1)
diff --git a/llvm/test/CodeGen/RISCV/pr88365.ll b/llvm/test/CodeGen/RISCV/pr88365.ll
index 73010fdf40447..4e4dead98ee69 100644
--- a/llvm/test/CodeGen/RISCV/pr88365.ll
+++ b/llvm/test/CodeGen/RISCV/pr88365.ll
@@ -10,7 +10,7 @@ define void @foo() {
 ; CHECK-NEXT:    .cfi_offset ra, -4
 ; CHECK-NEXT:    li a0, -2048
 ; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    .cfi_def_cfa_offset -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 4294967280
 ; CHECK-NEXT:    addi a0, sp, 4
 ; CHECK-NEXT:    call use
 ; CHECK-NEXT:    li a0, -2048
diff --git a/llvm/test/CodeGen/X86/huge-stack.ll b/llvm/test/CodeGen/X86/huge-stack.ll
index a7ceb4a4ee6fe..920033ba1182c 100644
--- a/llvm/test/CodeGen/X86/huge-stack.ll
+++ b/llvm/test/CodeGen/X86/huge-stack.ll
@@ -7,7 +7,7 @@ define void @foo() unnamed_addr #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
 ; CHECK-NEXT:    subq %rax, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset -122
+; CHECK-NEXT:    .cfi_def_cfa_offset 8589934470
 ; CHECK-NEXT:    movb $42, -129(%rsp)
 ; CHECK-NEXT:    movb $43, -128(%rsp)
 ; CHECK-NEXT:    movabsq $8589934462, %rax # imm = 0x1FFFFFF7E

From 07e9f01f8935c0006bdaf209acdce29cda7685d3 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Wed, 24 Jul 2024 17:28:44 +0200
Subject: [PATCH 026/427] [Clang] Fix an assertion failure introduced by #93430
 (#100313)

The PR #93430 introduced an assertion that did not make any sense. and
caused a regression. The fix is to simply remove the assertion.

No changelog. the intent is to backport this fix to clang 19.

(cherry picked from commit dd82a84e0eeafb017c7220c4a9fbd0a8a407f8a9)
---
 clang/lib/Sema/SemaExpr.cpp                | 1 -
 clang/test/SemaCXX/cxx2b-deducing-this.cpp | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 9207bf7a41349..206194930f3b4 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5730,7 +5730,6 @@ static bool isParenthetizedAndQualifiedAddressOfExpr(Expr *Fn) {
   if (!UO || UO->getOpcode() != clang::UO_AddrOf)
     return false;
   if (auto *DRE = dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParens())) {
-    assert(isa<FunctionDecl>(DRE->getDecl()) && "expected a function");
     return DRE->hasQualifier();
   }
   if (auto *OVL = dyn_cast<OverloadExpr>(UO->getSubExpr()->IgnoreParens()))
diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
index 5cbc1f735383b..4811b6052254c 100644
--- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp
+++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
@@ -895,6 +895,10 @@ void g() {
 }
 
 namespace P2797 {
+
+int bar(void) { return 55; }
+int (&fref)(void) = bar;
+
 struct C {
   void c(this const C&);    // #first
   void c() &;               // #second
@@ -915,6 +919,8 @@ struct C {
     (&C::c)(C{});
     (&C::c)(*this);     // expected-error {{call to non-static member function without an object argument}}
     (&C::c)();
+
+    (&fref)();
   }
 };
 }

From 12a11dc676fd36d790b705b918597877fb34772a Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Wed, 24 Jul 2024 17:27:58 +0200
Subject: [PATCH 027/427] [Clang][NFC] Simplify initialization of
 `OverloadCandidate` objects. (#100318)

Initialize some fields of OverloadCandidate in its constructor. The goal
here is try to fix read of uninitialized variable (which I was not able
to reproduce)
https://github.com/llvm/llvm-project/pull/93430#issuecomment-2187544278

We should certainly try to improve the construction of
`OverloadCandidate` further as it can be quite britle.

(cherry picked from commit 7d787df5b932b73aae6532d1e981152f103f9244)
---
 clang/include/clang/Sema/Overload.h |  4 +++-
 clang/lib/Sema/SemaOverload.cpp     | 20 +-------------------
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 9d8b797af6663..26ffe057c74a2 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -998,7 +998,9 @@ class Sema;
   private:
     friend class OverloadCandidateSet;
     OverloadCandidate()
-        : IsSurrogate(false), IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
+        : IsSurrogate(false), IgnoreObjectArgument(false),
+          TookAddressOfOverload(false), IsADLCandidate(CallExpr::NotADL),
+          RewriteKind(CRK_None) {}
   };
 
   /// OverloadCandidateSet - A set of overload candidates, used in C++
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index a8d250fbabfed..554a2df14bea6 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -6857,10 +6857,7 @@ void Sema::AddOverloadCandidate(
   Candidate.Viable = true;
   Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
-  Candidate.IsSurrogate = false;
   Candidate.IsADLCandidate = IsADLCandidate;
-  Candidate.IgnoreObjectArgument = false;
-  Candidate.TookAddressOfOverload = false;
   Candidate.ExplicitCallArguments = Args.size();
 
   // Explicit functions are not actually candidates at all if we're not
@@ -7422,8 +7419,6 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
   Candidate.Function = Method;
   Candidate.RewriteKind =
       CandidateSet.getRewriteInfo().getRewriteKind(Method, PO);
-  Candidate.IsSurrogate = false;
-  Candidate.IgnoreObjectArgument = false;
   Candidate.TookAddressOfOverload =
       CandidateSet.getKind() == OverloadCandidateSet::CSK_AddressOfOverloadSet;
   Candidate.ExplicitCallArguments = Args.size();
@@ -7617,7 +7612,6 @@ void Sema::AddMethodTemplateCandidate(
     Candidate.IgnoreObjectArgument =
         cast<CXXMethodDecl>(Candidate.Function)->isStatic() ||
         ObjectType.isNull();
-    Candidate.TookAddressOfOverload = false;
     Candidate.ExplicitCallArguments = Args.size();
     if (Result == TemplateDeductionResult::NonDependentConversionFailure)
       Candidate.FailureKind = ovl_fail_bad_conversion;
@@ -7705,7 +7699,6 @@ void Sema::AddTemplateOverloadCandidate(
     Candidate.IgnoreObjectArgument =
         isa<CXXMethodDecl>(Candidate.Function) &&
         !isa<CXXConstructorDecl>(Candidate.Function);
-    Candidate.TookAddressOfOverload = false;
     Candidate.ExplicitCallArguments = Args.size();
     if (Result == TemplateDeductionResult::NonDependentConversionFailure)
       Candidate.FailureKind = ovl_fail_bad_conversion;
@@ -7886,9 +7879,6 @@ void Sema::AddConversionCandidate(
   OverloadCandidate &Candidate = CandidateSet.addCandidate(1);
   Candidate.FoundDecl = FoundDecl;
   Candidate.Function = Conversion;
-  Candidate.IsSurrogate = false;
-  Candidate.IgnoreObjectArgument = false;
-  Candidate.TookAddressOfOverload = false;
   Candidate.FinalConversion.setAsIdentityConversion();
   Candidate.FinalConversion.setFromType(ConvType);
   Candidate.FinalConversion.setAllToTypes(ToType);
@@ -8084,9 +8074,6 @@ void Sema::AddTemplateConversionCandidate(
     Candidate.Function = FunctionTemplate->getTemplatedDecl();
     Candidate.Viable = false;
     Candidate.FailureKind = ovl_fail_bad_deduction;
-    Candidate.IsSurrogate = false;
-    Candidate.IgnoreObjectArgument = false;
-    Candidate.TookAddressOfOverload = false;
     Candidate.ExplicitCallArguments = 1;
     Candidate.DeductionFailure = MakeDeductionFailureInfo(Context, Result,
                                                           Info);
@@ -8119,10 +8106,8 @@ void Sema::AddSurrogateCandidate(CXXConversionDecl *Conversion,
   Candidate.FoundDecl = FoundDecl;
   Candidate.Function = nullptr;
   Candidate.Surrogate = Conversion;
-  Candidate.Viable = true;
   Candidate.IsSurrogate = true;
-  Candidate.IgnoreObjectArgument = false;
-  Candidate.TookAddressOfOverload = false;
+  Candidate.Viable = true;
   Candidate.ExplicitCallArguments = Args.size();
 
   // Determine the implicit conversion sequence for the implicit
@@ -8328,9 +8313,6 @@ void Sema::AddBuiltinCandidate(QualType *ParamTys, ArrayRef<Expr *> Args,
   OverloadCandidate &Candidate = CandidateSet.addCandidate(Args.size());
   Candidate.FoundDecl = DeclAccessPair::make(nullptr, AS_none);
   Candidate.Function = nullptr;
-  Candidate.IsSurrogate = false;
-  Candidate.IgnoreObjectArgument = false;
-  Candidate.TookAddressOfOverload = false;
   std::copy(ParamTys, ParamTys + Args.size(), Candidate.BuiltinParamTypes);
 
   // Determine the implicit conversion sequences for each of the

From 5c03c4fc269039dad8db96c298aaacf819b32125 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 24 Jul 2024 11:03:52 -0500
Subject: [PATCH 028/427] [libc++] Improve behavior when using relative path
 for LIBCXX_ASSERTION_HANDLER_FILE (#100157)

Fixes #80696

(cherry picked from commit 046a17717d9c5b5385ecd914621b48bdd91524d0)
---
 libcxx/CMakeLists.txt          | 8 ++++++--
 libcxx/docs/BuildingLibcxx.rst | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 332816b15260a..674082c7d1787 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -71,12 +71,16 @@ if (NOT "${LIBCXX_HARDENING_MODE}" IN_LIST LIBCXX_SUPPORTED_HARDENING_MODES)
     "Unsupported hardening mode: '${LIBCXX_HARDENING_MODE}'. Supported values are ${LIBCXX_SUPPORTED_HARDENING_MODES}.")
 endif()
 set(LIBCXX_ASSERTION_HANDLER_FILE
-  "${CMAKE_CURRENT_SOURCE_DIR}/vendor/llvm/default_assertion_handler.in"
+  "vendor/llvm/default_assertion_handler.in"
   CACHE STRING
   "Specify the path to a header that contains a custom implementation of the
    assertion handler that gets invoked when a hardening assertion fails. If
    provided, this header will be included by the library, replacing the
-   default assertion handler.")
+   default assertion handler. If this is specified as a relative path, it
+   is assumed to be relative to '<monorepo>/libcxx'.")
+if (NOT IS_ABSOLUTE "${LIBCXX_ASSERTION_HANDLER_FILE}")
+  set(LIBCXX_ASSERTION_HANDLER_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${LIBCXX_ASSERTION_HANDLER_FILE}")
+endif()
 option(LIBCXX_ENABLE_RANDOM_DEVICE
   "Whether to include support for std::random_device in the library. Disabling
    this can be useful when building the library for platforms that don't have
diff --git a/libcxx/docs/BuildingLibcxx.rst b/libcxx/docs/BuildingLibcxx.rst
index 66bb19bb5b2cd..5c224689e0f9f 100644
--- a/libcxx/docs/BuildingLibcxx.rst
+++ b/libcxx/docs/BuildingLibcxx.rst
@@ -406,7 +406,8 @@ libc++ Feature Options
   Specify the path to a header that contains a custom implementation of the
   assertion handler that gets invoked when a hardening assertion fails. If
   provided, this header will be included by the library, replacing the
-  default assertion handler.
+  default assertion handler. If this is specified as a relative path, it
+  is assumed to be relative to ``<monorepo>/libcxx``.
 
 
 libc++ ABI Feature Options

From 05446fb31a97eb768c494ad7175809faf7d2000a Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Wed, 24 Jul 2024 19:42:48 +0200
Subject: [PATCH 029/427] [libc++][spaceship] Implements X::iterator container
 requirements. (#99343)

This implements the requirements for the container iterator requirements
for array, deque, vector, and `vector<bool>`.

Implements:
- LWG3352 strong_equality isn't a thing

Implements parts of:
- P1614R2 The Mothership has Landed

Fixes: https://github.com/llvm/llvm-project/issues/62486
---
 libcxx/docs/Status/Cxx20Issues.csv            |  2 +-
 libcxx/docs/Status/SpaceshipProjects.csv      |  2 +-
 libcxx/include/__bit_reference                | 14 +++
 libcxx/include/__iterator/bounded_iter.h      | 24 +++++
 libcxx/include/__iterator/wrap_iter.h         | 23 +++++
 libcxx/include/deque                          | 29 +++++-
 .../bounded_iter/comparison.pass.cpp          | 14 ++-
 .../sequences/array/iterators.pass.cpp        | 18 ++++
 .../sequences/deque/iterators.pass.cpp        | 29 ++++++
 .../sequences/vector.bool/iterators.pass.cpp  | 37 ++++++++
 .../sequences/vector/iterators.pass.cpp       | 49 +++++++++-
 .../span.iterators/iterator.pass.cpp          | 92 +++++++++++++++++++
 .../string.view.iterators/iterators.pass.cpp  | 85 +++++++++++++++++
 libcxx/test/support/test_iterators.h          |  2 +
 14 files changed, 413 insertions(+), 7 deletions(-)
 create mode 100644 libcxx/test/std/containers/views/views.span/span.iterators/iterator.pass.cpp
 create mode 100644 libcxx/test/std/strings/string.view/string.view.iterators/iterators.pass.cpp

diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index 1a40a4472a405..8a431c922a2d9 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -264,7 +264,7 @@
 "`3349 <https://wg21.link/LWG3349>`__","Missing ``__cpp_lib_constexpr_complex``\  for P0415R1","Prague","|Complete|","16.0"
 "`3350 <https://wg21.link/LWG3350>`__","Simplify return type of ``lexicographical_compare_three_way``\ ","Prague","|Complete|","17.0","|spaceship|"
 "`3351 <https://wg21.link/LWG3351>`__","``ranges::enable_safe_range``\  should not be constrained","Prague","|Complete|","15.0","|ranges|"
-"`3352 <https://wg21.link/LWG3352>`__","``strong_equality``\  isn't a thing","Prague","|Nothing To Do|","","|spaceship|"
+"`3352 <https://wg21.link/LWG3352>`__","``strong_equality``\  isn't a thing","Prague","|Complete|","19.0","|spaceship|"
 "`3354 <https://wg21.link/LWG3354>`__","``has_strong_structural_equality``\  has a meaningless definition","Prague","|Nothing To Do|","","|spaceship|"
 "`3355 <https://wg21.link/LWG3355>`__","The memory algorithms should support move-only input iterators introduced by P1207","Prague","|Complete|","15.0","|ranges|"
 "`3356 <https://wg21.link/LWG3356>`__","``__cpp_lib_nothrow_convertible``\  should be ``__cpp_lib_is_nothrow_convertible``\ ","Prague","|Complete|","12.0"
diff --git a/libcxx/docs/Status/SpaceshipProjects.csv b/libcxx/docs/Status/SpaceshipProjects.csv
index e1cf2044cfd78..4dc43cdbbd08f 100644
--- a/libcxx/docs/Status/SpaceshipProjects.csv
+++ b/libcxx/docs/Status/SpaceshipProjects.csv
@@ -83,7 +83,7 @@ Section,Description,Dependencies,Assignee,Complete
 "| `[string.view.synop] <https://wg21.link/string.view.synop>`_
 | `[string.view.comparison] <https://wg21.link/string.view.comparison>`_",| `basic_string_view <https://reviews.llvm.org/D130295>`_,None,Mark de Wever,|Complete|
 - `5.7 Clause 22: Containers library <https://wg21.link/p1614r2#clause-22-containers-library>`_,,,,
-| `[container.requirements.general] <https://wg21.link/container.requirements.general>`_,|,None,Unassigned,|Not Started|
+| `[container.requirements.general] <https://wg21.link/container.requirements.general>`_,|,None,Mark de Wever,|Complete|
 | `[array.syn] <https://wg21.link/array.syn>`_ (`general <https://wg21.link/container.opt.reqmts>`_),| `array <https://reviews.llvm.org/D132265>`_,[expos.only.func],"| Adrian Vogelsgesang
 | Hristo Hristov",|Complete|
 | `[deque.syn] <https://wg21.link/deque.syn>`_ (`general <https://wg21.link/container.opt.reqmts>`_),| `deque <https://reviews.llvm.org/D144821>`_,[expos.only.func],Hristo Hristov,|Complete|
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 606069d98be72..22637d4397412 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -16,6 +16,7 @@
 #include <__bit/countr.h>
 #include <__bit/invert_if.h>
 #include <__bit/popcount.h>
+#include <__compare/ordering.h>
 #include <__config>
 #include <__fwd/bit_reference.h>
 #include <__iterator/iterator_traits.h>
@@ -913,6 +914,7 @@ public:
     return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_;
   }
 
+#if _LIBCPP_STD_VER <= 17
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
   operator!=(const __bit_iterator& __x, const __bit_iterator& __y) {
     return !(__x == __y);
@@ -937,6 +939,18 @@ public:
   operator>=(const __bit_iterator& __x, const __bit_iterator& __y) {
     return !(__x < __y);
   }
+#else  // _LIBCPP_STD_VER <= 17
+  _LIBCPP_HIDE_FROM_ABI constexpr friend strong_ordering
+  operator<=>(const __bit_iterator& __x, const __bit_iterator& __y) {
+    if (__x.__seg_ < __y.__seg_)
+      return strong_ordering::less;
+
+    if (__x.__seg_ == __y.__seg_)
+      return __x.__ctz_ <=> __y.__ctz_;
+
+    return strong_ordering::greater;
+  }
+#endif // _LIBCPP_STD_VER <= 17
 
 private:
   _LIBCPP_HIDE_FROM_ABI
diff --git a/libcxx/include/__iterator/bounded_iter.h b/libcxx/include/__iterator/bounded_iter.h
index ce0823b8c97e4..8a81c9ffbfc3f 100644
--- a/libcxx/include/__iterator/bounded_iter.h
+++ b/libcxx/include/__iterator/bounded_iter.h
@@ -11,6 +11,8 @@
 #define _LIBCPP___ITERATOR_BOUNDED_ITER_H
 
 #include <__assert>
+#include <__compare/ordering.h>
+#include <__compare/three_way_comparable.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
 #include <__memory/pointer_traits.h>
@@ -201,10 +203,15 @@ struct __bounded_iter {
   operator==(__bounded_iter const& __x, __bounded_iter const& __y) _NOEXCEPT {
     return __x.__current_ == __y.__current_;
   }
+
+#if _LIBCPP_STD_VER <= 17
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool
   operator!=(__bounded_iter const& __x, __bounded_iter const& __y) _NOEXCEPT {
     return __x.__current_ != __y.__current_;
   }
+#endif
+
+  // TODO(mordante) disable these overloads in the LLVM 20 release.
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool
   operator<(__bounded_iter const& __x, __bounded_iter const& __y) _NOEXCEPT {
     return __x.__current_ < __y.__current_;
@@ -222,6 +229,23 @@ struct __bounded_iter {
     return __x.__current_ >= __y.__current_;
   }
 
+#if _LIBCPP_STD_VER >= 20
+  _LIBCPP_HIDE_FROM_ABI constexpr friend strong_ordering
+  operator<=>(__bounded_iter const& __x, __bounded_iter const& __y) noexcept {
+    if constexpr (three_way_comparable<_Iterator, strong_ordering>) {
+      return __x.__current_ <=> __y.__current_;
+    } else {
+      if (__x.__current_ < __y.__current_)
+        return strong_ordering::less;
+
+      if (__x.__current_ == __y.__current_)
+        return strong_ordering::equal;
+
+      return strong_ordering::greater;
+    }
+  }
+#endif // _LIBCPP_STD_VER >= 20
+
 private:
   template <class>
   friend struct pointer_traits;
diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h
index 252d13b26c9e2..56183c0ee794d 100644
--- a/libcxx/include/__iterator/wrap_iter.h
+++ b/libcxx/include/__iterator/wrap_iter.h
@@ -10,6 +10,8 @@
 #ifndef _LIBCPP___ITERATOR_WRAP_ITER_H
 #define _LIBCPP___ITERATOR_WRAP_ITER_H
 
+#include <__compare/ordering.h>
+#include <__compare/three_way_comparable.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
 #include <__memory/addressof.h>
@@ -131,6 +133,7 @@ operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC
   return __x.base() < __y.base();
 }
 
+#if _LIBCPP_STD_VER <= 17
 template <class _Iter1>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool
 operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT {
@@ -142,7 +145,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool
 operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT {
   return !(__x == __y);
 }
+#endif
 
+// TODO(mordante) disable these overloads in the LLVM 20 release.
 template <class _Iter1>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool
 operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT {
@@ -179,6 +184,24 @@ operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX
   return !(__y < __x);
 }
 
+#if _LIBCPP_STD_VER >= 20
+template <class _Iter1, class _Iter2>
+_LIBCPP_HIDE_FROM_ABI constexpr strong_ordering
+operator<=>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) noexcept {
+  if constexpr (three_way_comparable_with<_Iter1, _Iter2, strong_ordering>) {
+    return __x.base() <=> __y.base();
+  } else {
+    if (__x.base() < __y.base())
+      return strong_ordering::less;
+
+    if (__x.base() == __y.base())
+      return strong_ordering::equal;
+
+    return strong_ordering::greater;
+  }
+}
+#endif // _LIBCPP_STD_VER >= 20
+
 template <class _Iter1, class _Iter2>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
 #ifndef _LIBCPP_CXX03_LANG
diff --git a/libcxx/include/deque b/libcxx/include/deque
index 4fc994a6e229b..e73135a8647b9 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -376,10 +376,13 @@ public:
     return __x.__ptr_ == __y.__ptr_;
   }
 
+#if _LIBCPP_STD_VER <= 17
   _LIBCPP_HIDE_FROM_ABI friend bool operator!=(const __deque_iterator& __x, const __deque_iterator& __y) {
     return !(__x == __y);
   }
+#endif
 
+  // TODO(mordante) disable these overloads in the LLVM 20 release.
   _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __deque_iterator& __x, const __deque_iterator& __y) {
     return __x.__m_iter_ < __y.__m_iter_ || (__x.__m_iter_ == __y.__m_iter_ && __x.__ptr_ < __y.__ptr_);
   }
@@ -396,6 +399,29 @@ public:
     return !(__x < __y);
   }
 
+#if _LIBCPP_STD_VER >= 20
+  _LIBCPP_HIDE_FROM_ABI friend strong_ordering operator<=>(const __deque_iterator& __x, const __deque_iterator& __y) {
+    if (__x.__m_iter_ < __y.__m_iter_)
+      return strong_ordering::less;
+
+    if (__x.__m_iter_ == __y.__m_iter_) {
+      if constexpr (three_way_comparable<pointer, strong_ordering>) {
+        return __x.__ptr_ <=> __y.__ptr_;
+      } else {
+        if (__x.__ptr_ < __y.__ptr_)
+          return strong_ordering::less;
+
+        if (__x.__ptr_ == __y.__ptr_)
+          return strong_ordering::equal;
+
+        return strong_ordering::greater;
+      }
+    }
+
+    return strong_ordering::greater;
+  }
+#endif // _LIBCPP_STD_VER >= 20
+
 private:
   _LIBCPP_HIDE_FROM_ABI explicit __deque_iterator(__map_iterator __m, pointer __p) _NOEXCEPT
       : __m_iter_(__m),
@@ -2530,8 +2556,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator<=(const deque<_Tp, _Allocator>& __x,
 template <class _Tp, class _Allocator>
 _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp>
 operator<=>(const deque<_Tp, _Allocator>& __x, const deque<_Tp, _Allocator>& __y) {
-  return std::lexicographical_compare_three_way(
-      __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
+  return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
 }
 
 #endif // _LIBCPP_STD_VER <= 17
diff --git a/libcxx/test/libcxx/iterators/bounded_iter/comparison.pass.cpp b/libcxx/test/libcxx/iterators/bounded_iter/comparison.pass.cpp
index 9c5df5da55b9c..cef2157469c8f 100644
--- a/libcxx/test/libcxx/iterators/bounded_iter/comparison.pass.cpp
+++ b/libcxx/test/libcxx/iterators/bounded_iter/comparison.pass.cpp
@@ -11,6 +11,7 @@
 //
 // Comparison operators
 
+#include <concepts>
 #include <__iterator/bounded_iter.h>
 
 #include "test_iterators.h"
@@ -59,6 +60,12 @@ TEST_CONSTEXPR_CXX14 bool tests() {
     assert(iter1 >= iter1);
   }
 
+#if TEST_STD_VER >= 20
+  // P1614
+  std::same_as<std::strong_ordering> decltype(auto) r1 = iter1 <=> iter2;
+  assert(r1 == std::strong_ordering::less);
+#endif
+
   return true;
 }
 
@@ -69,8 +76,11 @@ int main(int, char**) {
 #endif
 
 #if TEST_STD_VER > 17
-  tests<contiguous_iterator<int*> >();
-  static_assert(tests<contiguous_iterator<int*> >(), "");
+  tests<contiguous_iterator<int*>>();
+  static_assert(tests<contiguous_iterator<int*>>());
+
+  tests<three_way_contiguous_iterator<int*>>();
+  static_assert(tests<three_way_contiguous_iterator<int*>>());
 #endif
 
   return 0;
diff --git a/libcxx/test/std/containers/sequences/array/iterators.pass.cpp b/libcxx/test/std/containers/sequences/array/iterators.pass.cpp
index 106bc45c70998..710994c68295e 100644
--- a/libcxx/test/std/containers/sequences/array/iterators.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/iterators.pass.cpp
@@ -148,6 +148,15 @@ TEST_CONSTEXPR_CXX17 bool tests()
             assert(std::rbegin(c)  != std::rend(c));
             assert(std::cbegin(c)  != std::cend(c));
             assert(std::crbegin(c) != std::crend(c));
+
+#  if TEST_STD_VER >= 20
+            // P1614 + LWG3352
+            std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+            assert(r1 == std::strong_ordering::equal);
+
+            std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+            assert(r2 == std::strong_ordering::equal);
+#  endif
         }
         {
             typedef std::array<int, 0> C;
@@ -189,6 +198,15 @@ TEST_CONSTEXPR_CXX17 bool tests()
             assert(std::rbegin(c)  == std::rend(c));
             assert(std::cbegin(c)  == std::cend(c));
             assert(std::crbegin(c) == std::crend(c));
+
+#  if TEST_STD_VER >= 20
+            // P1614 + LWG3352
+            std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+            assert(r1 == std::strong_ordering::equal);
+
+            std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+            assert(r2 == std::strong_ordering::equal);
+#  endif
         }
     }
 #endif
diff --git a/libcxx/test/std/containers/sequences/deque/iterators.pass.cpp b/libcxx/test/std/containers/sequences/deque/iterators.pass.cpp
index 1f06ffde41ac2..484a2961fdb0c 100644
--- a/libcxx/test/std/containers/sequences/deque/iterators.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/iterators.pass.cpp
@@ -41,7 +41,27 @@ int main(int, char**)
     i = c.begin();
     C::const_iterator j;
     j = c.cbegin();
+
     assert(i == j);
+    assert(!(i != j));
+
+    assert(!(i < j));
+    assert((i <= j));
+
+    assert(!(i > j));
+    assert((i >= j));
+
+#  if TEST_STD_VER >= 20
+    // P1614 + LWG3352
+    // When the allocator does not have operator<=> then the iterator uses a
+    // fallback to provide operator<=>.
+    // Make sure to test with an allocator that does not have operator<=>.
+    static_assert(!std::three_way_comparable<min_allocator<int>, std::strong_ordering>);
+    static_assert(std::three_way_comparable<typename C::iterator, std::strong_ordering>);
+
+    std::same_as<std::strong_ordering> decltype(auto) r1 = i <=> j;
+    assert(r1 == std::strong_ordering::equal);
+#  endif
     }
 #endif
 #if TEST_STD_VER > 11
@@ -74,6 +94,15 @@ int main(int, char**)
 //         assert ( cii != c.begin());
 //         assert ( cii != c.cend());
 //         assert ( ii1 != c.end());
+
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+        assert(r1 == std::strong_ordering::equal);
+
+        std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+        assert(r2 == std::strong_ordering::equal);
+#  endif // TEST_STD_VER > 20
     }
 #endif
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/iterators.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/iterators.pass.cpp
index 9aaaac7a5557f..1e4877e8d2443 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/iterators.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/iterators.pass.cpp
@@ -77,7 +77,21 @@ TEST_CONSTEXPR_CXX20 bool tests()
         C::iterator i = c.begin();
         C::iterator j = c.end();
         assert(std::distance(i, j) == 0);
+
         assert(i == j);
+        assert(!(i != j));
+
+        assert(!(i < j));
+        assert((i <= j));
+
+        assert(!(i > j));
+        assert((i >= j));
+
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        std::same_as<std::strong_ordering> decltype(auto) r = i <=> j;
+        assert(r == std::strong_ordering::equal);
+#  endif
     }
     {
         typedef bool T;
@@ -86,7 +100,21 @@ TEST_CONSTEXPR_CXX20 bool tests()
         C::const_iterator i = c.begin();
         C::const_iterator j = c.end();
         assert(std::distance(i, j) == 0);
+
         assert(i == j);
+        assert(!(i != j));
+
+        assert(!(i < j));
+        assert((i <= j));
+
+        assert(!(i > j));
+        assert((i >= j));
+
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        std::same_as<std::strong_ordering> decltype(auto) r = i <=> j;
+        assert(r == std::strong_ordering::equal);
+#  endif
     }
     {
         typedef bool T;
@@ -131,6 +159,15 @@ TEST_CONSTEXPR_CXX20 bool tests()
         assert ( (cii >= ii1 ));
         assert (cii - ii1 == 0);
         assert (ii1 - cii == 0);
+
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+        assert(r1 == std::strong_ordering::equal);
+
+        std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+        assert(r2 == std::strong_ordering::equal);
+#  endif // TEST_STD_VER > 20
     }
 #endif
 
diff --git a/libcxx/test/std/containers/sequences/vector/iterators.pass.cpp b/libcxx/test/std/containers/sequences/vector/iterators.pass.cpp
index 70e0e35767e09..0aa7ad0d42ed7 100644
--- a/libcxx/test/std/containers/sequences/vector/iterators.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/iterators.pass.cpp
@@ -87,7 +87,27 @@ TEST_CONSTEXPR_CXX20 bool tests()
         C::iterator i = c.begin();
         C::iterator j = c.end();
         assert(std::distance(i, j) == 0);
+
         assert(i == j);
+        assert(!(i != j));
+
+        assert(!(i < j));
+        assert((i <= j));
+
+        assert(!(i > j));
+        assert((i >= j));
+
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        // When the allocator does not have operator<=> then the iterator uses a
+        // fallback to provide operator<=>.
+        // Make sure to test with an allocator that does not have operator<=>.
+        static_assert(!std::three_way_comparable<min_allocator<int>, std::strong_ordering>);
+        static_assert(std::three_way_comparable<typename C::iterator, std::strong_ordering>);
+
+        std::same_as<std::strong_ordering> decltype(auto) r1 = i <=> j;
+        assert(r1 == std::strong_ordering::equal);
+#  endif
     }
     {
         typedef int T;
@@ -96,7 +116,26 @@ TEST_CONSTEXPR_CXX20 bool tests()
         C::const_iterator i = c.begin();
         C::const_iterator j = c.end();
         assert(std::distance(i, j) == 0);
+
         assert(i == j);
+        assert(!(i != j));
+
+        assert(!(i < j));
+        assert((i <= j));
+
+        assert(!(i > j));
+        assert((i >= j));
+
+#  if TEST_STD_VER >= 20
+        // When the allocator does not have operator<=> then the iterator uses a
+        // fallback to provide operator<=>.
+        // Make sure to test with an allocator that does not have operator<=>.
+        static_assert(!std::three_way_comparable<min_allocator<int>, std::strong_ordering>);
+        static_assert(std::three_way_comparable<typename C::iterator, std::strong_ordering>);
+
+        std::same_as<std::strong_ordering> decltype(auto) r1 = i <=> j;
+        assert(r1 == std::strong_ordering::equal);
+#  endif
     }
     {
         typedef int T;
@@ -164,8 +203,16 @@ TEST_CONSTEXPR_CXX20 bool tests()
         assert ( (cii >= ii1 ));
         assert (cii - ii1 == 0);
         assert (ii1 - cii == 0);
+#  if TEST_STD_VER >= 20
+        // P1614 + LWG3352
+        std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+        assert(r1 == std::strong_ordering::equal);
+
+        std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+        assert(r2 == std::strong_ordering::equal);
+#  endif // TEST_STD_VER > 20
     }
-#endif
+#endif // TEST_STD_VER > 11
 
     return true;
 }
diff --git a/libcxx/test/std/containers/views/views.span/span.iterators/iterator.pass.cpp b/libcxx/test/std/containers/views/views.span/span.iterators/iterator.pass.cpp
new file mode 100644
index 0000000000000..13a7628e6043d
--- /dev/null
+++ b/libcxx/test/std/containers/views/views.span/span.iterators/iterator.pass.cpp
@@ -0,0 +1,92 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <span>
+
+// class iterator
+
+#include <cassert>
+#include <concepts>
+#include <iterator>
+#include <span>
+#include <string>
+#include <version> // __cpp_lib_ranges_as_const is not defined in span.
+
+#include "test_macros.h"
+
+template <class T>
+constexpr void test_type() {
+  using C = std::span<T>;
+  typename C::iterator ii1{}, ii2{};
+  typename C::iterator ii4 = ii1;
+  // TODO Test against C++23 after implementing
+  //  P2278R4 cbegin should always return a constant iterator
+  // The means adjusting the #ifdef to guard against C++23.
+#ifdef __cpp_lib_ranges_as_const
+  typename C::const_iterator cii{};
+#endif
+  assert(ii1 == ii2);
+  assert(ii1 == ii4);
+#ifdef __cpp_lib_ranges_as_const
+  assert(ii1 == cii);
+#endif
+
+  assert(!(ii1 != ii2));
+#ifdef __cpp_lib_ranges_as_const
+  assert(!(ii1 != cii));
+#endif
+
+  T v;
+  C c{&v, 1};
+  assert(c.begin() == std::begin(c));
+  assert(c.rbegin() == std::rbegin(c));
+#ifdef __cpp_lib_ranges_as_const
+  assert(c.cbegin() == std::cbegin(c));
+  assert(c.crbegin() == std::crbegin(c));
+#endif
+
+  assert(c.end() == std::end(c));
+  assert(c.rend() == std::rend(c));
+#ifdef __cpp_lib_ranges_as_const
+  assert(c.cend() == std::cend(c));
+  assert(c.crend() == std::crend(c));
+#endif
+
+  assert(std::begin(c) != std::end(c));
+  assert(std::rbegin(c) != std::rend(c));
+#ifdef __cpp_lib_ranges_as_const
+  assert(std::cbegin(c) != std::cend(c));
+  assert(std::crbegin(c) != std::crend(c));
+#endif
+
+  // P1614 + LWG3352
+  std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+  assert(r1 == std::strong_ordering::equal);
+
+#ifdef __cpp_lib_ranges_as_const
+  std::same_as<std::strong_ordering> decltype(auto) r2 = cii <=> ii2;
+  assert(r2 == std::strong_ordering::equal);
+#endif
+}
+
+constexpr bool test() {
+  test_type<char>();
+  test_type<int>();
+  test_type<std::string>();
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test(), "");
+
+  return 0;
+}
diff --git a/libcxx/test/std/strings/string.view/string.view.iterators/iterators.pass.cpp b/libcxx/test/std/strings/string.view/string.view.iterators/iterators.pass.cpp
new file mode 100644
index 0000000000000..75d492bf7b3c6
--- /dev/null
+++ b/libcxx/test/std/strings/string.view/string.view.iterators/iterators.pass.cpp
@@ -0,0 +1,85 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: !stdlib=libc++ && (c++03 || c++11 || c++14)
+
+// <string_view>
+
+// class iterator
+
+#include <cassert>
+#include <concepts>
+#include <iterator>
+#include <string_view>
+
+#include "test_macros.h"
+#include "make_string.h"
+
+template <class CharT>
+TEST_CONSTEXPR_CXX14 void test_type() {
+  using C                  = std::basic_string_view<CharT>;
+  typename C::iterator ii1 = typename C::iterator(), ii2 = typename C::iterator();
+  typename C::iterator ii4       = ii1;
+  typename C::const_iterator cii = typename C::const_iterator();
+  assert(ii1 == ii2);
+  assert(ii1 == ii4);
+  assert(ii1 == cii);
+
+  assert(!(ii1 != ii2));
+  assert(!(ii1 != cii));
+
+#if TEST_STD_VER >= 17
+  C c = MAKE_STRING_VIEW(CharT, "abc");
+  assert(c.begin() == std::begin(c));
+  assert(c.rbegin() == std::rbegin(c));
+  assert(c.cbegin() == std::cbegin(c));
+  assert(c.crbegin() == std::crbegin(c));
+
+  assert(c.end() == std::end(c));
+  assert(c.rend() == std::rend(c));
+  assert(c.cend() == std::cend(c));
+  assert(c.crend() == std::crend(c));
+
+  assert(std::begin(c) != std::end(c));
+  assert(std::rbegin(c) != std::rend(c));
+  assert(std::cbegin(c) != std::cend(c));
+  assert(std::crbegin(c) != std::crend(c));
+#endif
+
+#if TEST_STD_VER >= 20
+  // P1614 + LWG3352
+  std::same_as<std::strong_ordering> decltype(auto) r1 = ii1 <=> ii2;
+  assert(r1 == std::strong_ordering::equal);
+
+  std::same_as<std::strong_ordering> decltype(auto) r2 = ii1 <=> ii2;
+  assert(r2 == std::strong_ordering::equal);
+#endif
+}
+
+TEST_CONSTEXPR_CXX14 bool test() {
+  test_type<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test_type<wchar_t>();
+#endif
+#ifndef TEST_HAS_NO_CHAR8_T
+  test_type<char8_t>();
+#endif
+  test_type<char16_t>();
+  test_type<char32_t>();
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+#if TEST_STD_VER >= 14
+  static_assert(test(), "");
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 31564a3977317..95d1b7df0007c 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -389,6 +389,8 @@ class contiguous_iterator
     friend TEST_CONSTEXPR bool operator> (const contiguous_iterator& x, const contiguous_iterator& y) {return x.it_ >  y.it_;}
     friend TEST_CONSTEXPR bool operator>=(const contiguous_iterator& x, const contiguous_iterator& y) {return x.it_ >= y.it_;}
 
+    // Note no operator<=>, use three_way_contiguous_iterator for testing operator<=>
+
     friend TEST_CONSTEXPR It base(const contiguous_iterator& i) { return i.it_; }
 
     template <class T>

From 577f886f2ef32dc19d7534a0d4873269cd4d1efe Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Wed, 24 Jul 2024 12:36:08 -0700
Subject: [PATCH 030/427] [ExprConstant] Handle shift overflow the same way as
 other kinds of overflow (#99579)

We have a mechanism to allow folding expressions that aren't ICEs as an
extension; use it more consistently.

This ends up causing bad effects on diagnostics in a few cases, but
that's not specific to shifts; it's a general issue with the way those
uses handle overflow diagnostics.

(cherry picked from commit 20eff684203287828d6722fc860b9d3621429542)
---
 clang/docs/ReleaseNotes.rst              |  2 ++
 clang/lib/AST/ExprConstant.cpp           | 26 ++++++++++++++++--------
 clang/lib/AST/Interp/Interp.h            | 22 ++++++++++++--------
 clang/lib/Sema/SemaExpr.cpp              |  3 ++-
 clang/test/CXX/basic/basic.types/p10.cpp |  2 +-
 clang/test/Sema/constant-builtins-2.c    | 12 ++++-------
 clang/test/SemaCXX/class.cpp             |  9 +++++++-
 clang/test/SemaCXX/enum.cpp              | 24 +++++++++++++++-------
 8 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5b6ee9830b507..549da6812740f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -765,6 +765,8 @@ Improvements to Clang's diagnostics
 
      UsingWithAttr<int> objUsingWA; // warning: 'UsingWithAttr' is deprecated
 
+- Clang now diagnoses undefined behavior in constant expressions more consistently. This includes invalid shifts, and signed overflow in arithmetic.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 03a606102a77e..5e57b5e8bc8f1 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2839,6 +2839,8 @@ static bool handleIntIntBinOp(EvalInfo &Info, const BinaryOperator *E,
       // During constant-folding, a negative shift is an opposite shift. Such
       // a shift is not a constant expression.
       Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS;
+      if (!Info.noteUndefinedBehavior())
+        return false;
       RHS = -RHS;
       goto shift_right;
     }
@@ -2849,19 +2851,23 @@ static bool handleIntIntBinOp(EvalInfo &Info, const BinaryOperator *E,
     if (SA != RHS) {
       Info.CCEDiag(E, diag::note_constexpr_large_shift)
         << RHS << E->getType() << LHS.getBitWidth();
+      if (!Info.noteUndefinedBehavior())
+        return false;
     } else if (LHS.isSigned() && !Info.getLangOpts().CPlusPlus20) {
       // C++11 [expr.shift]p2: A signed left shift must have a non-negative
       // operand, and must not overflow the corresponding unsigned type.
       // C++2a [expr.shift]p2: E1 << E2 is the unique value congruent to
       // E1 x 2^E2 module 2^N.
-      if (LHS.isNegative())
+      if (LHS.isNegative()) {
         Info.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << LHS;
-      else if (LHS.countl_zero() < SA)
+        if (!Info.noteUndefinedBehavior())
+          return false;
+      } else if (LHS.countl_zero() < SA) {
         Info.CCEDiag(E, diag::note_constexpr_lshift_discards);
+        if (!Info.noteUndefinedBehavior())
+          return false;
+      }
     }
-    if (Info.EvalStatus.Diag && !Info.EvalStatus.Diag->empty() &&
-        Info.getLangOpts().CPlusPlus11)
-      return false;
     Result = LHS << SA;
     return true;
   }
@@ -2875,6 +2881,8 @@ static bool handleIntIntBinOp(EvalInfo &Info, const BinaryOperator *E,
       // During constant-folding, a negative shift is an opposite shift. Such a
       // shift is not a constant expression.
       Info.CCEDiag(E, diag::note_constexpr_negative_shift) << RHS;
+      if (!Info.noteUndefinedBehavior())
+        return false;
       RHS = -RHS;
       goto shift_left;
     }
@@ -2882,13 +2890,13 @@ static bool handleIntIntBinOp(EvalInfo &Info, const BinaryOperator *E,
     // C++11 [expr.shift]p1: Shift width must be less than the bit width of the
     // shifted type.
     unsigned SA = (unsigned) RHS.getLimitedValue(LHS.getBitWidth()-1);
-    if (SA != RHS)
+    if (SA != RHS) {
       Info.CCEDiag(E, diag::note_constexpr_large_shift)
         << RHS << E->getType() << LHS.getBitWidth();
+      if (!Info.noteUndefinedBehavior())
+        return false;
+    }
 
-    if (Info.EvalStatus.Diag && !Info.EvalStatus.Diag->empty() &&
-        Info.getLangOpts().CPlusPlus11)
-      return false;
     Result = LHS >> SA;
     return true;
   }
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index 8e96f78d90568..253a433e7340a 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -153,7 +153,8 @@ bool CheckShift(InterpState &S, CodePtr OpPC, const LT &LHS, const RT &RHS,
   if (RHS.isNegative()) {
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt();
-    return false;
+    if (!S.noteUndefinedBehavior())
+      return false;
   }
 
   // C++11 [expr.shift]p1: Shift width must be less than the bit width of
@@ -163,17 +164,24 @@ bool CheckShift(InterpState &S, CodePtr OpPC, const LT &LHS, const RT &RHS,
     const APSInt Val = RHS.toAPSInt();
     QualType Ty = E->getType();
     S.CCEDiag(E, diag::note_constexpr_large_shift) << Val << Ty << Bits;
-    return !(S.getEvalStatus().Diag && !S.getEvalStatus().Diag->empty() && S.getLangOpts().CPlusPlus11);
+    if (!S.noteUndefinedBehavior())
+      return false;
   }
 
   if (LHS.isSigned() && !S.getLangOpts().CPlusPlus20) {
     const Expr *E = S.Current->getExpr(OpPC);
     // C++11 [expr.shift]p2: A signed left shift must have a non-negative
     // operand, and must not overflow the corresponding unsigned type.
-    if (LHS.isNegative())
+    if (LHS.isNegative()) {
       S.CCEDiag(E, diag::note_constexpr_lshift_of_negative) << LHS.toAPSInt();
-    else if (LHS.toUnsigned().countLeadingZeros() < static_cast<unsigned>(RHS))
+      if (!S.noteUndefinedBehavior())
+        return false;
+    } else if (LHS.toUnsigned().countLeadingZeros() <
+               static_cast<unsigned>(RHS)) {
       S.CCEDiag(E, diag::note_constexpr_lshift_discards);
+      if (!S.noteUndefinedBehavior())
+        return false;
+    }
   }
 
   // C++2a [expr.shift]p2: [P0907R4]:
@@ -2269,8 +2277,7 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) {
     // shift is not a constant expression.
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt();
-    if (S.getLangOpts().CPlusPlus11 && S.getEvalStatus().Diag &&
-        !S.getEvalStatus().Diag->empty())
+    if (!S.noteUndefinedBehavior())
       return false;
     RHS = -RHS;
     return DoShift < LT, RT,
@@ -2286,8 +2293,7 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) {
       // E1 x 2^E2 module 2^N.
       const SourceInfo &Loc = S.Current->getSource(OpPC);
       S.CCEDiag(Loc, diag::note_constexpr_lshift_of_negative) << LHS.toAPSInt();
-      if (S.getLangOpts().CPlusPlus11 && S.getEvalStatus().Diag &&
-          !S.getEvalStatus().Diag->empty())
+      if (!S.noteUndefinedBehavior())
         return false;
     }
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 206194930f3b4..74c0e01705905 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17045,7 +17045,8 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result,
   // not a constant expression as a side-effect.
   bool Folded =
       E->EvaluateAsRValue(EvalResult, Context, /*isConstantContext*/ true) &&
-      EvalResult.Val.isInt() && !EvalResult.HasSideEffects;
+      EvalResult.Val.isInt() && !EvalResult.HasSideEffects &&
+      (!getLangOpts().CPlusPlus || !EvalResult.HasUndefinedBehavior);
 
   if (!isa<ConstantExpr>(E))
     E = ConstantExpr::Create(Context, E, EvalResult.Val);
diff --git a/clang/test/CXX/basic/basic.types/p10.cpp b/clang/test/CXX/basic/basic.types/p10.cpp
index a543f248e5371..92d6da0035ea5 100644
--- a/clang/test/CXX/basic/basic.types/p10.cpp
+++ b/clang/test/CXX/basic/basic.types/p10.cpp
@@ -142,7 +142,7 @@ constexpr int arb(int n) { // expected-note {{declared here}}
                expected-note {{function parameter 'n' with unknown value cannot be used in a constant expression}}
 }
 constexpr long Overflow[(1 << 30) << 2]{}; // expected-warning {{requires 34 bits to represent}} \
-                                              expected-warning {{variable length array folded to constant array as an extension}} \
+                                              expected-error {{variable length array declaration not allowed at file scope}} \
                                               expected-warning {{variable length arrays in C++ are a Clang extension}} \
                                               expected-note {{signed left shift discards bits}}
 
diff --git a/clang/test/Sema/constant-builtins-2.c b/clang/test/Sema/constant-builtins-2.c
index 00767267cd6c2..37b63cf4f6b32 100644
--- a/clang/test/Sema/constant-builtins-2.c
+++ b/clang/test/Sema/constant-builtins-2.c
@@ -265,10 +265,8 @@ char clz52[__builtin_clzg((unsigned __int128)0x1) == BITSIZE(__int128) - 1 ? 1 :
 char clz53[__builtin_clzg((unsigned __int128)0x1, 42) == BITSIZE(__int128) - 1 ? 1 : -1];
 char clz54[__builtin_clzg((unsigned __int128)0xf) == BITSIZE(__int128) - 4 ? 1 : -1];
 char clz55[__builtin_clzg((unsigned __int128)0xf, 42) == BITSIZE(__int128) - 4 ? 1 : -1];
-char clz56[__builtin_clzg((unsigned __int128)(1 << (BITSIZE(__int128) - 1))) == 0 ? 1 : -1]; // expected-warning {{variable length array folded to constant array as an extension}}
-                                                                                             // expected-note@-1 {{shift count 127 >= width of type 'int' (32 bits)}}
-char clz57[__builtin_clzg((unsigned __int128)(1 << (BITSIZE(__int128) - 1)), 42) == 0 ? 1 : -1]; // expected-warning {{variable length array folded to constant array as an extension}}
-                                                                                                 // expected-note@-1 {{shift count 127 >= width of type 'int' (32 bits)}}
+char clz56[__builtin_clzg((unsigned __int128)(1 << (BITSIZE(__int128) - 1))) == 0 ? 1 : -1]; // expected-error {{variable length array declaration not allowed at file scope}}
+char clz57[__builtin_clzg((unsigned __int128)(1 << (BITSIZE(__int128) - 1)), 42) == 0 ? 1 : -1]; // expected-error {{variable length array declaration not allowed at file scope}}
 #endif
 int clz58 = __builtin_clzg((unsigned _BitInt(128))0); // expected-error {{not a compile-time constant}}
 char clz59[__builtin_clzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1];
@@ -276,10 +274,8 @@ char clz60[__builtin_clzg((unsigned _BitInt(128))0x1) == BITSIZE(_BitInt(128)) -
 char clz61[__builtin_clzg((unsigned _BitInt(128))0x1, 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1];
 char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
 char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1];
-char clz64[__builtin_clzg((unsigned _BitInt(128))(1 << (BITSIZE(_BitInt(128)) - 1))) == 0 ? 1 : -1]; // expected-warning {{variable length array folded to constant array as an extension}}
-                                                                                                     // expected-note@-1 {{shift count 127 >= width of type 'int' (32 bits)}}
-char clz65[__builtin_clzg((unsigned _BitInt(128))(1 << (BITSIZE(_BitInt(128)) - 1)), 42) == 0 ? 1 : -1]; // expected-warning {{variable length array folded to constant array as an extension}}
-                                                                                                         // expected-note@-1 {{shift count 127 >= width of type 'int' (32 bits)}}
+char clz64[__builtin_clzg((unsigned _BitInt(128))(1 << (BITSIZE(_BitInt(128)) - 1))) == 0 ? 1 : -1]; // expected-error {{variable length array declaration not allowed at file scope}}
+char clz65[__builtin_clzg((unsigned _BitInt(128))(1 << (BITSIZE(_BitInt(128)) - 1)), 42) == 0 ? 1 : -1]; // expected-error {{variable length array declaration not allowed at file scope}}
 
 char ctz1[__builtin_ctz(1) == 0 ? 1 : -1];
 char ctz2[__builtin_ctz(8) == 3 ? 1 : -1];
diff --git a/clang/test/SemaCXX/class.cpp b/clang/test/SemaCXX/class.cpp
index f874b7be2b70e..2f59544e7f36c 100644
--- a/clang/test/SemaCXX/class.cpp
+++ b/clang/test/SemaCXX/class.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wc++11-compat %s 
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx11 -Wc++11-compat %s
 // RUN: %clang_cc1 -fsyntax-only -verify -Wc++11-compat %s -std=c++98
 class C {
 public:
@@ -55,6 +55,13 @@ class C {
   // expected-error@-2 {{static const volatile data member must be initialized out of line}}
 #endif
   static const E evi = 0;
+  static const int overflow = 1000000*1000000; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
+                                               // expected-warning@-1 {{overflow in expression}}
+  static const int overflow_shift = 1<<32; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
+  static const int overflow_shift2 = 1>>32; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
+  static const int overflow_shift3 = 1<<-1; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
+  static const int overflow_shift4 = 1<<-1; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
+  static const int overflow_shift5 = -1<<1; // cxx11-error {{in-class initializer for static data member is not a constant expression}}
 
   void m() {
     sx = 0;
diff --git a/clang/test/SemaCXX/enum.cpp b/clang/test/SemaCXX/enum.cpp
index 739d35ec4a06b..9c398cc8da886 100644
--- a/clang/test/SemaCXX/enum.cpp
+++ b/clang/test/SemaCXX/enum.cpp
@@ -103,14 +103,14 @@ void PR8089() {
 // This is accepted as a GNU extension. In C++98, there was no provision for
 // expressions with UB to be non-constant.
 enum { overflow = 123456 * 234567 };
-#if __cplusplus >= 201103L
-// expected-warning@-2 {{expression is not an integral constant expression; folding it to a constant is a GNU extension}}
-// expected-note@-3 {{value 28958703552 is outside the range of representable values of type 'int'}}
-#else
-// expected-error@-5 {{expression is not an integral constant expression}}
-// expected-note@-6 {{value 28958703552 is outside the range of representable values of type 'int'}}
-// expected-warning@-7 {{overflow in expression; result is -1'106'067'520 with type 'int'}}
+// expected-error@-1 {{expression is not an integral constant expression}}
+// expected-note@-2 {{value 28958703552 is outside the range of representable values of type 'int'}}
+#if __cplusplus < 201103L
+// expected-warning@-4 {{overflow in expression; result is -1'106'067'520 with type 'int'}}
 #endif
+enum { overflow_shift = 1 << 32 };
+// expected-error@-1 {{expression is not an integral constant expression}}
+// expected-note@-2 {{shift count 32 >= width of type 'int' (32 bits)}}
 
 // FIXME: This is not consistent with the above case.
 enum NoFold : int { overflow2 = 123456 * 234567 };
@@ -123,6 +123,16 @@ enum NoFold : int { overflow2 = 123456 * 234567 };
 // expected-error@-7 {{expression is not an integral constant expression}}
 // expected-note@-8 {{value 28958703552 is outside the range of representable values of type 'int'}}
 #endif
+enum : int { overflow2_shift = 1 << 32 };
+#if __cplusplus >= 201103L
+// expected-error@-2 {{enumerator value is not a constant expression}}
+// expected-note@-3 {{shift count 32 >= width of type 'int' (32 bits)}}
+#else
+// expected-error@-5 {{expression is not an integral constant expression}}
+// expected-note@-6 {{shift count 32 >= width of type 'int' (32 bits)}}
+// expected-warning@-7 {{enumeration types with a fixed underlying type are a C++11 extension}}
+#endif
+
 
 // PR28903
 struct PR28903 {

From 01bd0394c9d5809be2d125ae1dfd3faef8bf0942 Mon Sep 17 00:00:00 2001
From: Carlos Seo <carlos.seo@linaro.org>
Date: Wed, 24 Jul 2024 11:18:08 -0300
Subject: [PATCH 031/427] [AArch64] Implement INIT/ADJUST_TRAMPOLINE (#70267)

Add support for llvm.init.trampoline and llvm.adjust.trampoline
intrinsics for AArch64.

Fixes https://github.com/llvm/llvm-project/issues/65573
Fixes https://github.com/llvm/llvm-project/issues/76927
Fixes https://github.com/llvm/llvm-project/issues/83555
Updates https://github.com/llvm/llvm-project/pull/66157

(cherry picked from commit c4b66bf4d065d3bbc2e2fac8512a6df8e013c704)
---
 compiler-rt/lib/builtins/README.txt           |  5 ++
 compiler-rt/lib/builtins/trampoline_setup.c   | 42 ++++++++++++++
 .../builtins/Unit/trampoline_setup_test.c     |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 58 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +
 llvm/test/CodeGen/AArch64/trampoline.ll       | 19 ++++++
 6 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/trampoline.ll

diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 2d213d95f333a..19f26c92a0f94 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
 switch8
 switchu8
 
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+                        const void* realFunc, void* localsPtr);
+
 // There is no C interface to the *_vfp_d8_d15_regs functions.  There are
 // called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use
 // SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 844eb27944142..830e25e4c0303 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
   __clear_cache(trampOnStack, &trampOnStack[10]);
 }
 #endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && defined(__ELF__)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+                                        int trampSizeAllocated,
+                                        const void *realFunc, void *localsPtr) {
+  // This should never happen, but if compiler did not allocate
+  // enough space on stack for the trampoline, abort.
+  if (trampSizeAllocated < 36)
+    compilerrt_abort();
+
+  // create trampoline
+  // Load realFunc into x17. mov/movk 16 bits at a time.
+  trampOnStack[0] =
+      0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+  trampOnStack[1] =
+      0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+  trampOnStack[2] =
+      0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+  trampOnStack[3] =
+      0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+  // Load localsPtr into x18
+  trampOnStack[4] =
+      0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+  trampOnStack[5] =
+      0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+  trampOnStack[6] =
+      0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+  trampOnStack[7] =
+      0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+  trampOnStack[8] = 0xd61f0220; // br x17
+
+  // Clear instruction cache.
+  __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index da115fe764271..d51d35acaa02f 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
 
 /*
  * Tests nested functions
- * The ppc compiler generates a call to __trampoline_setup
+ * The ppc and aarch64 compilers generates a call to __trampoline_setup
  * The i386 and x86_64 compilers generate a call to ___enable_execute_stack
  */
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 87e7750768d2d..6d413a09407a9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Try to create BICs for vector ANDs.
   setTargetDAGCombine(ISD::AND);
 
+  // llvm.init.trampoline and llvm.adjust.trampoline
+  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
   return Final;
 }
 
+SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error(
+        "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
+  return Op.getOperand(0);
+}
+
+SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  SDLoc dl(Op);
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = Trmp;
+  Args.push_back(Entry);
+  Entry.Node = DAG.getConstant(20, dl, MVT::i64);
+  Args.push_back(Entry);
+
+  Entry.Node = FPtr;
+  Args.push_back(Entry);
+  Entry.Node = Nest;
+  Args.push_back(Entry);
+
+  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+  return CallResult.second;
+}
+
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerGlobalTLSAddress(Op, DAG);
   case ISD::PtrAuthGlobalAddress:
     return LowerPtrAuthGlobalAddress(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:
+    return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::INIT_TRAMPOLINE:
+    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::SETCC:
   case ISD::STRICT_FSETCC:
   case ISD::STRICT_FSETCCS:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ef45e4f01ecd3..81e15185f985d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
                          SDValue TVal, SDValue FVal, const SDLoc &dl,
                          SelectionDAG &DAG) const;
+  SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
new file mode 100644
index 0000000000000..293e538a7459d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+
+declare void @llvm.init.trampoline(ptr, ptr, ptr);
+declare ptr @llvm.adjust.trampoline(ptr);
+
+define i64 @f(ptr nest %c, i64 %x, i64 %y) {
+  %sum = add i64 %x, %y
+  ret i64 %sum
+}
+
+define i64 @main() {
+  %val = alloca i64
+  %nval = bitcast ptr %val to ptr
+  %tramp = alloca [36 x i8], align 8
+  ; CHECK:	bl	__trampoline_setup
+  call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
+  %fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
+  ret i64 0
+}

From 98b2bc5b08802ab0ee79b28e10ed3ea531588d67 Mon Sep 17 00:00:00 2001
From: Carlos Seo <carlos.seo@linaro.org>
Date: Wed, 24 Jul 2024 18:14:05 -0300
Subject: [PATCH 032/427] [Flang][Docs] Update information about AArch64
 trampolines (#100391)

Commits c4b66bf and 7647174 add support for AArch64 trampolines. Updated
documentation to reflect the changes.

(cherry picked from commit c6e69b041a7e6d18463f6cf684b10fd46a62c496)
---
 flang/docs/InternalProcedureTrampolines.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/docs/InternalProcedureTrampolines.md b/flang/docs/InternalProcedureTrampolines.md
index ef02f1d737c82..41f6155332a47 100644
--- a/flang/docs/InternalProcedureTrampolines.md
+++ b/flang/docs/InternalProcedureTrampolines.md
@@ -239,7 +239,7 @@ automatically deallocated at the end of `host()` invocation.
 Unfortunately, this requires the program stack to be writeable and executable
 at the same time, which might be a security concern.
 
-> NOTE: LLVM's AArch64 backend supports `nest` attribute, but it does not seem to support trampoline intrinsics.
+> NOTE: LLVM's AArch64 backend supports `nest` attribute, but it requires the compiler-rt runtime selected via the `-rtlib=compiler-rt` flag.
 
 ## Alternative implementation(s)
 

From 14fa8cd47eddf6f5837759872d99836b50eb55be Mon Sep 17 00:00:00 2001
From: Daniil Kovalev <dkovalev@accesssoftek.com>
Date: Thu, 25 Jul 2024 02:13:30 +0300
Subject: [PATCH 033/427] [PAC][clang] Enable `-fptrauth-indirect-gotos` as
 part of pauthtest ABI (#100480)

(cherry picked from commit 3f6eb13abf643afec17a73448ede380606531226)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 4 ++++
 clang/test/Driver/aarch64-ptrauth.c   | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 78936fd634f33..5de29f1eca614 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1516,6 +1516,10 @@ static void handlePAuthABI(const ArgList &DriverArgs, ArgStringList &CC1Args) {
           options::OPT_fno_ptrauth_vtable_pointer_type_discrimination))
     CC1Args.push_back("-fptrauth-vtable-pointer-type-discrimination");
 
+  if (!DriverArgs.hasArg(options::OPT_fptrauth_indirect_gotos,
+                         options::OPT_fno_ptrauth_indirect_gotos))
+    CC1Args.push_back("-fptrauth-indirect-gotos");
+
   if (!DriverArgs.hasArg(options::OPT_fptrauth_init_fini,
                          options::OPT_fno_ptrauth_init_fini))
     CC1Args.push_back("-fptrauth-init-fini");
diff --git a/clang/test/Driver/aarch64-ptrauth.c b/clang/test/Driver/aarch64-ptrauth.c
index d13930e8f4b37..eeb9500792d75 100644
--- a/clang/test/Driver/aarch64-ptrauth.c
+++ b/clang/test/Driver/aarch64-ptrauth.c
@@ -19,16 +19,16 @@
 // RUN: %clang -### -c --target=aarch64-linux-pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
 // PAUTHABI1:      "-cc1"{{.*}} "-triple" "aarch64-unknown-linux-pauthtest"
 // PAUTHABI1-SAME: "-target-abi" "pauthtest"
-// PAUTHABI1-SAME: "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-init-fini"
+// PAUTHABI1-SAME: "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-indirect-gotos" "-fptrauth-init-fini"
 
 // RUN: %clang -### -c --target=aarch64 -mabi=pauthtest -fno-ptrauth-intrinsics \
 // RUN:   -fno-ptrauth-calls -fno-ptrauth-returns -fno-ptrauth-auth-traps \
 // RUN:   -fno-ptrauth-vtable-pointer-address-discrimination -fno-ptrauth-vtable-pointer-type-discrimination \
-// RUN:   -fno-ptrauth-init-fini %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
+// RUN:   -fno-ptrauth-indirect-gotos -fno-ptrauth-init-fini %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
 // RUN: %clang -### -c --target=aarch64-pauthtest -fno-ptrauth-intrinsics \
 // RUN:   -fno-ptrauth-calls -fno-ptrauth-returns -fno-ptrauth-auth-traps \
 // RUN:   -fno-ptrauth-vtable-pointer-address-discrimination -fno-ptrauth-vtable-pointer-type-discrimination \
-// RUN:   -fno-ptrauth-init-fini %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
+// RUN:   -fno-ptrauth-indirect-gotos -fno-ptrauth-init-fini %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI2
 // PAUTHABI2:     "-cc1"
 // PAUTHABI2-NOT: "-fptrauth-
 

From bdeb078aa24734ce6d2e573701f63e6111d7bab4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Wed, 24 Jul 2024 21:06:19 -0500
Subject: [PATCH 034/427] [libc] Only add '-fno-builtin-*' on the entrypoints
 that use them (#100481)

Summary:
The GPU build needs to be able to inline stuff in LTO. Builtin
transformations cause problems on the functions that the optimizer does
heavy libcall recognition on. Previously we moved to using
`-fno-builtin-*` to allow us to only disable the problematic ones.
However, this still didn't allow inlining because each function had the
attribute that told the inliner not to inlining a nobuiltin function
into a non-nobuiltin function

This patch fixes that by only applying these attributes to the
entrypoints that define them. That is enough to prevent recursive calls
within the definitoins themselves.

(cherry picked from commit 8e43acbfedf53ded43ec693ddaaf518cb7416c1c)
---
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 11 +----------
 libc/cmake/modules/LLVMLibCObjectRules.cmake        | 11 +++++++++++
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 97d1c7262d24d..7a1c45a814eb6 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -104,16 +104,7 @@ function(_get_common_compile_options output_var flags)
       list(APPEND compile_options "-ffixed-point")
     endif()
 
-    # Builtin recognition causes issues when trying to implement the builtin
-    # functions themselves. The GPU backends do not use libcalls so we disable
-    # the known problematic ones. This allows inlining during LTO linking.
-    if(LIBC_TARGET_OS_IS_GPU)
-      set(libc_builtins bcmp strlen memmem bzero memcmp memcpy memmem memmove
-                        memset strcmp strstr)
-      foreach(builtin ${libc_builtins})
-        list(APPEND compile_options "-fno-builtin-${builtin}")
-      endforeach()
-    else()
+    if(NOT LIBC_TARGET_OS_IS_GPU)
       list(APPEND compile_options "-fno-builtin")
     endif()
 
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index 2d3db38ecd8a3..68b5ed1ed51c0 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -279,6 +279,17 @@ function(create_entrypoint_object fq_target_name)
   add_dependencies(${fq_target_name} ${full_deps_list})
   target_link_libraries(${fq_target_name} ${full_deps_list})
 
+  # Builtin recognition causes issues when trying to implement the builtin
+  # functions themselves. The GPU backends do not use libcalls so we disable the
+  # known problematic ones on the entrypoints that implement them.
+  if(LIBC_TARGET_OS_IS_GPU)
+    set(libc_builtins bcmp strlen memmem bzero memcmp memcpy memmem memmove
+                      memset strcmp strstr)
+    if(${ADD_ENTRYPOINT_OBJ_NAME} IN_LIST libc_builtins)
+      target_compile_options(${fq_target_name} PRIVATE -fno-builtin-${ADD_ENTRYPOINT_OBJ_NAME})
+    endif()
+  endif()
+
   set_target_properties(
     ${fq_target_name}
     PROPERTIES

From 86e7adaa1b77089c7d8e39f13b8365a7fa92dde6 Mon Sep 17 00:00:00 2001
From: Kiran Chandramohan <kiran.chandramohan@arm.com>
Date: Wed, 24 Jul 2024 16:28:24 +0100
Subject: [PATCH 035/427] [Flang][Driver] Enable config file options (#100343)

Config files provide a facility to invoke the compiler with a predefined
set of options. The patch only enables these options in the flang
driver. Functionality was always there.

(cherry picked from commit 8a77961280536b680c404a49002a00b988ca45fc)
---
 clang/include/clang/Driver/Options.td         | 10 +--
 flang/test/Driver/Inputs/config-1.cfg         |  1 +
 flang/test/Driver/Inputs/config-2.cfg         |  1 +
 flang/test/Driver/Inputs/config-2a.cfg        |  1 +
 flang/test/Driver/Inputs/config-6.cfg         |  1 +
 flang/test/Driver/Inputs/config/config-4.cfg  |  1 +
 flang/test/Driver/Inputs/config2/config-4.cfg |  1 +
 flang/test/Driver/config-file.f90             | 63 +++++++++++++++++++
 8 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 flang/test/Driver/Inputs/config-1.cfg
 create mode 100644 flang/test/Driver/Inputs/config-2.cfg
 create mode 100644 flang/test/Driver/Inputs/config-2a.cfg
 create mode 100644 flang/test/Driver/Inputs/config-6.cfg
 create mode 100644 flang/test/Driver/Inputs/config/config-4.cfg
 create mode 100644 flang/test/Driver/Inputs/config2/config-4.cfg
 create mode 100644 flang/test/Driver/config-file.f90

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 69269cf7537b0..359a698ea87dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1165,19 +1165,19 @@ def client__name : JoinedOrSeparate<["-"], "client_name">;
 def combine : Flag<["-", "--"], "combine">, Flags<[NoXarchOption, Unsupported]>;
 def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
 def config : Joined<["--"], "config=">, Flags<[NoXarchOption]>,
-  Visibility<[ClangOption, CLOption, DXCOption]>, MetaVarName<"<file>">,
+  Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, MetaVarName<"<file>">,
   HelpText<"Specify configuration file">;
-def : Separate<["--"], "config">, Alias<config>;
+def : Separate<["--"], "config">, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>, Alias<config>;
 def no_default_config : Flag<["--"], "no-default-config">,
-  Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption]>,
+  Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>,
   HelpText<"Disable loading default configuration files">;
 def config_system_dir_EQ : Joined<["--"], "config-system-dir=">,
   Flags<[NoXarchOption, HelpHidden]>,
-  Visibility<[ClangOption, CLOption, DXCOption]>,
+  Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>,
   HelpText<"System directory for configuration files">;
 def config_user_dir_EQ : Joined<["--"], "config-user-dir=">,
   Flags<[NoXarchOption, HelpHidden]>,
-  Visibility<[ClangOption, CLOption, DXCOption]>,
+  Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>,
   HelpText<"User directory for configuration files">;
 def coverage : Flag<["-", "--"], "coverage">, Group<Link_Group>,
   Visibility<[ClangOption, CLOption]>;
diff --git a/flang/test/Driver/Inputs/config-1.cfg b/flang/test/Driver/Inputs/config-1.cfg
new file mode 100644
index 0000000000000..824e128a42b63
--- /dev/null
+++ b/flang/test/Driver/Inputs/config-1.cfg
@@ -0,0 +1 @@
+-flto
diff --git a/flang/test/Driver/Inputs/config-2.cfg b/flang/test/Driver/Inputs/config-2.cfg
new file mode 100644
index 0000000000000..4e8d01b668e83
--- /dev/null
+++ b/flang/test/Driver/Inputs/config-2.cfg
@@ -0,0 +1 @@
+-fno-signed-zeros
diff --git a/flang/test/Driver/Inputs/config-2a.cfg b/flang/test/Driver/Inputs/config-2a.cfg
new file mode 100644
index 0000000000000..cd2916c98afe2
--- /dev/null
+++ b/flang/test/Driver/Inputs/config-2a.cfg
@@ -0,0 +1 @@
+-fopenmp
diff --git a/flang/test/Driver/Inputs/config-6.cfg b/flang/test/Driver/Inputs/config-6.cfg
new file mode 100644
index 0000000000000..81e9830f63be4
--- /dev/null
+++ b/flang/test/Driver/Inputs/config-6.cfg
@@ -0,0 +1 @@
+-fstack-arrays
diff --git a/flang/test/Driver/Inputs/config/config-4.cfg b/flang/test/Driver/Inputs/config/config-4.cfg
new file mode 100644
index 0000000000000..d15a7108d4e21
--- /dev/null
+++ b/flang/test/Driver/Inputs/config/config-4.cfg
@@ -0,0 +1 @@
+-O3
diff --git a/flang/test/Driver/Inputs/config2/config-4.cfg b/flang/test/Driver/Inputs/config2/config-4.cfg
new file mode 100644
index 0000000000000..9d1c3e38c8680
--- /dev/null
+++ b/flang/test/Driver/Inputs/config2/config-4.cfg
@@ -0,0 +1 @@
+-ffp-contract=fast
diff --git a/flang/test/Driver/config-file.f90 b/flang/test/Driver/config-file.f90
new file mode 100644
index 0000000000000..70316dd971f36
--- /dev/null
+++ b/flang/test/Driver/config-file.f90
@@ -0,0 +1,63 @@
+!--- Config file (full path) in output of -###
+!
+! RUN: %flang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-DIRS
+! CHECK-DIRS: System configuration file directory: {{.*}}/Inputs/config
+! CHECK-DIRS: User configuration file directory: {{.*}}/Inputs/config2
+!
+!--- Config file (full path) in output of -###
+!
+! RUN: %flang --config %S/Inputs/config-1.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-HHH
+! RUN: %flang --config=%S/Inputs/config-1.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-HHH
+! CHECK-HHH: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+! CHECK-HHH: -flto
+!
+!
+!--- Config file (full path) in output of -v
+!
+! RUN: %flang --config %S/Inputs/config-1.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-V
+! CHECK-V: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+! CHECK-V: -flto
+!
+!--- Config file in output of -###
+!
+! RUN: %flang --config-system-dir=%S/Inputs --config-user-dir= --config config-1.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-HHH2
+! CHECK-HHH2: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+! CHECK-HHH2: -flto
+!
+!--- Config file in output of -v
+!
+! RUN: %flang --config-system-dir=%S/Inputs --config-user-dir= --config config-1.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-V2
+! CHECK-V2: Configuration file: {{.*}}Inputs{{.}}config-1.cfg
+! CHECK-V2: -flto
+!
+!--- Nested config files
+!
+! RUN: %flang --config-system-dir=%S/Inputs --config-user-dir= --config config-2.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED
+! CHECK-NESTED: Configuration file: {{.*}}Inputs{{.}}config-2.cfg
+! CHECK-NESTED: -fno-signed-zeros
+!
+! RUN: %flang --config-system-dir=%S/Inputs --config-user-dir=%S/Inputs/config --config config-6.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED2
+! CHECK-NESTED2: Configuration file: {{.*}}Inputs{{.}}config-6.cfg
+! CHECK-NESTED2: -fstack-arrays
+!
+!
+! RUN: %flang --config %S/Inputs/config-2a.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTEDa
+! CHECK-NESTEDa: Configuration file: {{.*}}Inputs{{.}}config-2a.cfg
+! CHECK-NESTEDa: -fopenmp
+!
+! RUN: %flang --config-system-dir=%S/Inputs --config-user-dir= --config config-2a.cfg -S %s -### 2>&1 | FileCheck %s -check-prefix CHECK-NESTED2a
+! CHECK-NESTED2a: Configuration file: {{.*}}Inputs{{.}}config-2a.cfg
+! CHECK-NESTED2a: -fopenmp
+!
+!--- User directory is searched first.
+!
+! RUN: %flang --config-system-dir=%S/Inputs/config --config-user-dir=%S/Inputs/config2 --config config-4.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-PRECEDENCE
+! CHECK-PRECEDENCE: Configuration file: {{.*}}Inputs{{.}}config2{{.}}config-4.cfg
+! CHECK-PRECEDENCE: -ffp-contract=fast
+!
+!--- Multiple configuration files can be specified.
+! RUN: %flang --config-system-dir=%S/Inputs/config --config-user-dir= --config config-4.cfg --config %S/Inputs/config2/config-4.cfg -S %s -o /dev/null -v 2>&1 | FileCheck %s -check-prefix CHECK-TWO-CONFIGS
+! CHECK-TWO-CONFIGS: Configuration file: {{.*}}Inputs{{.}}config{{.}}config-4.cfg
+! CHECK-TWO-CONFIGS-NEXT: Configuration file: {{.*}}Inputs{{.}}config2{{.}}config-4.cfg
+! CHECK-TWO-CONFIGS: -ffp-contract=fast
+! CHECK-TWO-CONFIGS: -O3

From 3e4abe985a584f3067448b5169c05509d3b571d2 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Wed, 24 Jul 2024 14:30:25 +0100
Subject: [PATCH 036/427] [AArch64][SME] Rewrite __arm_get_current_vg to
 preserve required registers (#100143)

The documentation for the __arm_get_current_vg support routine specifies
that the following registers are call-preserved:
 - X1-X15, X19-X29 and SP
 - Z0-Z31
 - P0-P15

This patch rewrites the implementation of this routine in compiler-rt,
as the current version does not guarantee that these registers will be
preserved.

(cherry picked from commit 6da6772bf0a33131aa8540c9d4f60d5db75c32b5)
---
 compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 28 ------------
 compiler-rt/lib/builtins/aarch64/sme-abi.S    | 44 +++++++++++++++++++
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index 062cf80fc6848..20061012e16c6 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -10,15 +10,6 @@ struct FEATURES {
 
 extern struct FEATURES __aarch64_cpu_features;
 
-struct SME_STATE {
-  long PSTATE;
-  long TPIDR2_EL0;
-};
-
-extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;
-
-extern bool __aarch64_has_sme_and_tpidr2_el0;
-
 #if __GNUC__ >= 9
 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
 #endif
@@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
 
   __init_cpu_features();
 }
-
-__attribute__((target("sve"))) long
-__arm_get_current_vg(void) __arm_streaming_compatible {
-  struct SME_STATE State = __arm_sme_state();
-  unsigned long long features =
-      __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
-  bool HasSVE = features & (1ULL << FEAT_SVE);
-
-  if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
-    return 0;
-
-  if (HasSVE || (State.PSTATE & 1)) {
-    long vl;
-    __asm__ __volatile__("cntd %0" : "=r"(vl));
-    return vl;
-  }
-
-  return 0;
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 4c0ff66931db7..cd8153f60670f 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -12,11 +12,15 @@
 #if !defined(__APPLE__)
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
 #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
 
 .arch armv9-a+sme
@@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   ret
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
 
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+  .variant_pcs __arm_get_current_vg
+  BTI_C
+
+  stp     x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  mov     x29, sp
+  .cfi_def_cfa w29, 16
+  .cfi_offset w30, -8
+  .cfi_offset w29, -16
+  adrp    x17, CPU_FEATS_SYMBOL
+  ldr     w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  tbnz    w17, #30, 0f
+  adrp    x16, TPIDR2_SYMBOL
+  ldrb    w16, [x16, TPIDR2_SYMBOL_OFFSET]
+  cbz     w16, 1f
+0:
+  mov     x18, x1
+  bl      __arm_sme_state
+  mov     x1, x18
+  and     x17, x17, #0x40000000
+  bfxil   x17, x0, #0, #1
+  cbz     x17, 1f
+  cntd    x0
+  .cfi_def_cfa wsp, 16
+  ldp     x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+1:
+  mov     x0, xzr
+  .cfi_def_cfa wsp, 16
+  ldp     x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+
 NO_EXEC_STACK_DIRECTIVE
 
 // GNU property note for BTI and PAC

From d767c52c26d7f9a143b23934917645efe0763364 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav@gmail.com>
Date: Thu, 25 Jul 2024 20:15:14 +0400
Subject: [PATCH 037/427] [clang] Remove `__is_layout_compatible` from
 revertible type traits list (#100572)

`__is_layout_compatible` was added in Clang 19 (#81506), and at that
time it wasn't entirely clear whether it should be a revertible type
trait or not. We decided to follow the example of other type traits.
Since then #95969 happened, and now we know that we don't want new
revertible type traits.

This patch removes `__is_layout_compatible` from revertible type traits
list, and leaves a comment what revertible type traits are, and that new
type traits should not be added there.

The intention is to also cherry-pick this to 19 branch.

(cherry picked from commit 3295d377f37a60597321f502d164b5d6b1948e28)
---
 clang/lib/Parse/ParseExpr.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0a017ae79de75..e82b565272831 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -763,6 +763,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback {
 bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
                                    tok::TokenKind *Kind) {
   if (RevertibleTypeTraits.empty()) {
+// Revertible type trait is a feature for backwards compatibility with older
+// standard libraries that declare their own structs with the same name as
+// the builtins listed below. New builtins should NOT be added to this list.
 #define RTT_JOIN(X, Y) X##Y
 #define REVERTIBLE_TYPE_TRAIT(Name)                                            \
   RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] = RTT_JOIN(tok::kw_, Name)
@@ -790,7 +793,6 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
     REVERTIBLE_TYPE_TRAIT(__is_fundamental);
     REVERTIBLE_TYPE_TRAIT(__is_integral);
     REVERTIBLE_TYPE_TRAIT(__is_interface_class);
-    REVERTIBLE_TYPE_TRAIT(__is_layout_compatible);
     REVERTIBLE_TYPE_TRAIT(__is_literal);
     REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr);
     REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference);

From ec17a7a75911547b4567bb16fca72042abd105ff Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 25 Jul 2024 12:16:48 -0500
Subject: [PATCH 038/427] [libc++] Add missing xlocale.h include on Apple and
 FreeBSD (#99689)

The `<locale>` header uses `strtoll_l` and friends which are defined in
`<xlocale.h>` on these platforms. While this works via transitive
includes when modules are disabled, this doesn't work anymore if the
platforms are modularized properly.

(cherry picked from commit a55df237375e98cfc2520d5eb1a23b302ef02ba0)
---
 libcxx/include/locale | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcxx/include/locale b/libcxx/include/locale
index dbec23a2c936d..573910a85bef5 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -232,6 +232,10 @@ template <class charT> class messages_byname;
 #    include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
 #  endif
 
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    include <xlocale.h>
+#  endif
+
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #    pragma GCC system_header
 #  endif

From 511b8b094dd89f826cc95b52a68804e68d854a10 Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov <anton@korobeynikov.info>
Date: Thu, 25 Jul 2024 11:57:46 -0700
Subject: [PATCH 039/427] Normalize ptrauth handling in sanitizer runtime
 (#100483)

1. Include `ptrauth.h` if `ptrauth_intrinsics` language feature is specified (per ptrauth spec, this is what enables `ptrauh.h` usage and functions like `ptrauth_strip`)
 2. For PAC-RET fallback implement two changes:
    1. Switch to macro, so we can ignore key argument
    2. Ensure the unsigned value is erased from LR, so the possibility of gadget reuse is reduced.

Fixes #100467

(cherry picked from commit cc4f98979b079b517edd8a71f56a8975f436e63d)
---
 .../lib/sanitizer_common/sanitizer_ptrauth.h  | 46 ++++++++++---------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
index 5200354694851..b5215c0d49c06 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
@@ -9,31 +9,33 @@
 #ifndef SANITIZER_PTRAUTH_H
 #define SANITIZER_PTRAUTH_H
 
-#if __has_feature(ptrauth_calls)
-#include <ptrauth.h>
+#if __has_feature(ptrauth_intrinsics)
+#  include <ptrauth.h>
 #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__)
-inline unsigned long ptrauth_strip(void* __value, unsigned int __key) {
-  // On the stack the link register is protected with Pointer
-  // Authentication Code when compiled with -mbranch-protection.
-  // Let's stripping the PAC unconditionally because xpaclri is in
-  // the NOP space so will do nothing when it is not enabled or not available.
-  unsigned long ret;
-  asm volatile(
-      "mov x30, %1\n\t"
-      "hint #7\n\t"  // xpaclri
-      "mov %0, x30\n\t"
-      : "=r"(ret)
-      : "r"(__value)
-      : "x30");
-  return ret;
-}
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+// On the stack the link register is protected with Pointer
+// Authentication Code when compiled with -mbranch-protection.
+// Let's stripping the PAC unconditionally because xpaclri is in
+// the NOP space so will do nothing when it is not enabled or not available.
+#  define ptrauth_strip(__value, __key) \
+    ({                                  \
+      unsigned long ret;                \
+      asm volatile(                     \
+          "mov x30, %1\n\t"             \
+          "hint #7\n\t"                 \
+          "mov %0, x30\n\t"             \
+          "mov x30, xzr\n\t"            \
+          : "=r"(ret)                   \
+          : "r"(__value)                \
+          : "x30");                     \
+      ret;                              \
+    })
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #else
 // Copied from <ptrauth.h>
-#define ptrauth_strip(__value, __key) __value
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+#  define ptrauth_strip(__value, __key) __value
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #endif
 
 #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0))

From debf818b93b5fe5fbe3a8e2434f90d0a830b865b Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles@arm.com>
Date: Thu, 25 Jul 2024 16:53:27 +0100
Subject: [PATCH 040/427] [flang][OpenMP] Initialize privatised derived type
 variables (#100417)

Fixes #91928

(cherry picked from commit 98e733eaf2af1a5c1d9392e279d21182ffdf560d)
---
 flang/include/flang/Lower/ConvertVariable.h   |  8 ++++
 flang/lib/Lower/ConvertVariable.cpp           | 23 ++++-----
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  6 +++
 .../Lower/OpenMP/private-derived-type.f90     | 47 +++++++++++++++++++
 4 files changed, 73 insertions(+), 11 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/private-derived-type.f90

diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index 515f4695951b4..de394a39e112e 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -62,6 +62,14 @@ using AggregateStoreMap = llvm::DenseMap<AggregateStoreKey, mlir::Value>;
 void instantiateVariable(AbstractConverter &, const pft::Variable &var,
                          SymMap &symMap, AggregateStoreMap &storeMap);
 
+/// Does this variable have a default initialization?
+bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
+
+/// Call default initialization runtime routine to initialize \p var.
+void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
+                                const Fortran::semantics::Symbol &sym,
+                                Fortran::lower::SymMap &symMap);
+
 /// Create a fir::GlobalOp given a module variable definition. This is intended
 /// to be used when lowering a module definition, not when lowering variables
 /// used from a module. For used variables instantiateVariable must directly be
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 47ad48fb322cc..4fcfa0b126e04 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -72,7 +72,8 @@ static mlir::Value genScalarValue(Fortran::lower::AbstractConverter &converter,
 }
 
 /// Does this variable have a default initialization?
-static bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym) {
+bool Fortran::lower::hasDefaultInitialization(
+    const Fortran::semantics::Symbol &sym) {
   if (sym.has<Fortran::semantics::ObjectEntityDetails>() && sym.size())
     if (!Fortran::semantics::IsAllocatableOrPointer(sym))
       if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
@@ -353,7 +354,7 @@ static mlir::Value genComponentDefaultInit(
       // global constructor since this has no runtime cost.
       componentValue = fir::factory::createUnallocatedBox(
           builder, loc, componentTy, std::nullopt);
-    } else if (hasDefaultInitialization(component)) {
+    } else if (Fortran::lower::hasDefaultInitialization(component)) {
       // Component type has default initialization.
       componentValue = genDefaultInitializerValue(converter, loc, component,
                                                   componentTy, stmtCtx);
@@ -556,7 +557,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
                 builder.createConvert(loc, symTy, fir::getBase(initVal));
             builder.create<fir::HasValueOp>(loc, castTo);
           });
-    } else if (hasDefaultInitialization(sym)) {
+    } else if (Fortran::lower::hasDefaultInitialization(sym)) {
       Fortran::lower::createGlobalInitialization(
           builder, global, [&](fir::FirOpBuilder &builder) {
             Fortran::lower::StatementContext stmtCtx(
@@ -752,17 +753,15 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
     return true;
   // Local variables (including function results), and intent(out) dummies must
   // be default initialized at runtime if their type has default initialization.
-  return hasDefaultInitialization(sym);
+  return Fortran::lower::hasDefaultInitialization(sym);
 }
 
 /// Call default initialization runtime routine to initialize \p var.
-static void
-defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
-                           const Fortran::lower::pft::Variable &var,
-                           Fortran::lower::SymMap &symMap) {
+void Fortran::lower::defaultInitializeAtRuntime(
+    Fortran::lower::AbstractConverter &converter,
+    const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
-  const Fortran::semantics::Symbol &sym = var.getSymbol();
   fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
   if (Fortran::semantics::IsOptional(sym)) {
     // 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -927,7 +926,8 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
   if (needDummyIntentoutFinalization(var))
     finalizeAtRuntime(converter, var, symMap);
   if (mustBeDefaultInitializedAtRuntime(var))
-    defaultInitializeAtRuntime(converter, var, symMap);
+    Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
+                                               symMap);
   if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
     auto *builder = &converter.getFirOpBuilder();
     mlir::Location loc = converter.getCurrentLocation();
@@ -1168,7 +1168,8 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
   // do not try optimizing this to single default initializations of
   // the equivalenced storages. Keep lowering simple.
   if (mustBeDefaultInitializedAtRuntime(var))
-    defaultInitializeAtRuntime(converter, var, symMap);
+    Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
+                                               symMap);
 }
 
 //===--------------------------------------------------------------===//
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 7e76a81e0df92..a340b62eb7b66 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -13,6 +13,7 @@
 #include "DataSharingProcessor.h"
 
 #include "Utils.h"
+#include "flang/Lower/ConvertVariable.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/HLFIRTools.h"
@@ -117,6 +118,11 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
   bool success = converter.createHostAssociateVarClone(*sym);
   (void)success;
   assert(success && "Privatization failed due to existing binding");
+
+  bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
+  if (!isFirstPrivate &&
+      Fortran::lower::hasDefaultInitialization(sym->GetUltimate()))
+    Fortran::lower::defaultInitializeAtRuntime(converter, *sym, *symTable);
 }
 
 void DataSharingProcessor::copyFirstPrivateSymbol(
diff --git a/flang/test/Lower/OpenMP/private-derived-type.f90 b/flang/test/Lower/OpenMP/private-derived-type.f90
new file mode 100644
index 0000000000000..230484f20c11d
--- /dev/null
+++ b/flang/test/Lower/OpenMP/private-derived-type.f90
@@ -0,0 +1,47 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+subroutine s4
+  type y3
+    integer,allocatable::x
+  end type y3
+  type(y3)::v
+  !$omp parallel
+  !$omp do private(v)
+  do i=1,10
+    v%x=1
+  end do
+  !$omp end do
+  !$omp end parallel
+end subroutine s4
+
+
+! CHECK-LABEL:   func.func @_QPs4() {
+!                  Example of how the lowering for regular derived type variables:
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}> {bindc_name = "v", uniq_name = "_QFs4Ev"}
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFs4Ev"} : (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>, !fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>)
+! CHECK:           %[[VAL_10:.*]] = fir.embox %[[VAL_9]]#1 : (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>
+! CHECK:           %[[VAL_11:.*]] = fir.address_of
+! CHECK:           %[[VAL_12:.*]] = arith.constant 4 : i32
+! CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<none>
+! CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+! CHECK:           %[[VAL_15:.*]] = fir.call @_FortranAInitialize(%[[VAL_13]], %[[VAL_14]], %[[VAL_12]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_23:.*]] = fir.alloca !fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}> {bindc_name = "v", pinned, uniq_name = "_QFs4Ev"}
+! CHECK:             %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFs4Ev"} : (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>, !fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>)
+! CHECK:             %[[VAL_25:.*]] = fir.embox %[[VAL_24]]#1 : (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>
+! CHECK:             %[[VAL_26:.*]] = fir.address_of
+! CHECK:             %[[VAL_27:.*]] = arith.constant 4 : i32
+! CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_25]] : (!fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<none>
+! CHECK:             %[[VAL_29:.*]] = fir.convert %[[VAL_26]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8>
+!                    Check we do call FortranAInitialize on the derived type
+! CHECK:             %[[VAL_30:.*]] = fir.call @_FortranAInitialize(%[[VAL_28]], %[[VAL_29]], %[[VAL_27]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK:             omp.wsloop {
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           %[[VAL_39:.*]] = fir.embox %[[VAL_9]]#1 : (!fir.ref<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>
+! CHECK:           %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (!fir.box<!fir.type<_QFs4Ty3{x:!fir.box<!fir.heap<i32>>}>>) -> !fir.box<none>
+!                  Check the derived type is destroyed
+! CHECK:           %[[VAL_41:.*]] = fir.call @_FortranADestroy(%[[VAL_40]]) fastmath<contract> : (!fir.box<none>) -> none
+! CHECK:           return
+! CHECK:         }

From 50b9db3a2df2a69c490b50fcd2021c882ae34b85 Mon Sep 17 00:00:00 2001
From: Alan Zhao <ayzhao@google.com>
Date: Thu, 25 Jul 2024 17:38:44 -0700
Subject: [PATCH 041/427] [compiler-rt][ubsan][nfc-ish] Fix a type conversion
 bug (#100665)

If the inline asm version of `ptrauth_strip` is used instead of the
builtin, the inline asm implementation currently returns an unsigned
long, causing an incompatible pointer conversion issue. The spec for
`ptrauth_sign` is that the result has the same type as the original
value, so we add a cast to the result of the inline asm.

(cherry picked from commit 25f9415713f9f57760a5322876906dc11385ef8e)
---
 compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
index b5215c0d49c06..265a9925a15a0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
@@ -18,7 +18,7 @@
 // the NOP space so will do nothing when it is not enabled or not available.
 #  define ptrauth_strip(__value, __key) \
     ({                                  \
-      unsigned long ret;                \
+      __typeof(__value) ret;            \
       asm volatile(                     \
           "mov x30, %1\n\t"             \
           "hint #7\n\t"                 \

From 61b0a2fdf2c1d7432f54cfd0cd2e75556bdc0d33 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 25 Jul 2024 12:25:19 +0200
Subject: [PATCH 042/427] [BasicAA] Fix handling of indirect assumption based
 results (#100130)

If a result is potentially based on a not yet proven assumption,
BasicAA will remember it inside AssumptionBasedResults and remove
the cache entry if an assumption higher up is later disproved.
However, we currently miss the case where another cache entry ends
up depending on such an AssumptionBased result.

Fix this by introducing an additional AssumptionBased state for
cache entries. If such a result is used, we'll still increment
AAQI.NumAssumptionUses, which means that the using entry will
also become AssumptionBased and be cleared if the assumption is
disproved.

At the end of the root query, convert remaining AssumptionBased
results into definitive results.

Fixes https://github.com/llvm/llvm-project/issues/98978.

(cherry picked from commit 91073380ac5a0dceebdd09f360a1dc194d7ee93f)
---
 llvm/include/llvm/Analysis/AliasAnalysis.h    |  17 ++-
 llvm/lib/Analysis/BasicAliasAnalysis.cpp      |  28 ++++-
 .../Transforms/SLPVectorizer/X86/pr98978.ll   | 106 ++++++++++++++++++
 3 files changed, 144 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/pr98978.ll

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 812b5a9f72a3a..4140387a1f341 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -244,12 +244,23 @@ class AAQueryInfo {
 public:
   using LocPair = std::pair<AACacheLoc, AACacheLoc>;
   struct CacheEntry {
+    /// Cache entry is neither an assumption nor does it use a (non-definitive)
+    /// assumption.
+    static constexpr int Definitive = -2;
+    /// Cache entry is not an assumption itself, but may be using an assumption
+    /// from higher up the stack.
+    static constexpr int AssumptionBased = -1;
+
     AliasResult Result;
-    /// Number of times a NoAlias assumption has been used.
-    /// 0 for assumptions that have not been used, -1 for definitive results.
+    /// Number of times a NoAlias assumption has been used, 0 for assumptions
+    /// that have not been used. Can also take one of the Definitive or
+    /// AssumptionBased values documented above.
     int NumAssumptionUses;
+
     /// Whether this is a definitive (non-assumption) result.
-    bool isDefinitive() const { return NumAssumptionUses < 0; }
+    bool isDefinitive() const { return NumAssumptionUses == Definitive; }
+    /// Whether this is an assumption that has not been proven yet.
+    bool isAssumption() const { return NumAssumptionUses >= 0; }
   };
 
   // Alias analysis result aggregration using which this query is performed.
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 161a3034e4829..e474899fb548e 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1692,9 +1692,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   if (!Pair.second) {
     auto &Entry = Pair.first->second;
     if (!Entry.isDefinitive()) {
-      // Remember that we used an assumption.
-      ++Entry.NumAssumptionUses;
+      // Remember that we used an assumption. This may either be a direct use
+      // of an assumption, or a use of an entry that may itself be based on an
+      // assumption.
       ++AAQI.NumAssumptionUses;
+      if (Entry.isAssumption())
+        ++Entry.NumAssumptionUses;
     }
     // Cache contains sorted {V1,V2} pairs but we should return original order.
     auto Result = Entry.Result;
@@ -1722,7 +1725,6 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   Entry.Result = Result;
   // Cache contains sorted {V1,V2} pairs.
   Entry.Result.swap(Swapped);
-  Entry.NumAssumptionUses = -1;
 
   // If the assumption has been disproven, remove any results that may have
   // been based on this assumption. Do this after the Entry updates above to
@@ -1734,8 +1736,26 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   // The result may still be based on assumptions higher up in the chain.
   // Remember it, so it can be purged from the cache later.
   if (OrigNumAssumptionUses != AAQI.NumAssumptionUses &&
-      Result != AliasResult::MayAlias)
+      Result != AliasResult::MayAlias) {
     AAQI.AssumptionBasedResults.push_back(Locs);
+    Entry.NumAssumptionUses = AAQueryInfo::CacheEntry::AssumptionBased;
+  } else {
+    Entry.NumAssumptionUses = AAQueryInfo::CacheEntry::Definitive;
+  }
+
+  // Depth is incremented before this function is called, so Depth==1 indicates
+  // a root query.
+  if (AAQI.Depth == 1) {
+    // Any remaining assumption based results must be based on proven
+    // assumptions, so convert them to definitive results.
+    for (const auto &Loc : AAQI.AssumptionBasedResults) {
+      auto It = AAQI.AliasCache.find(Loc);
+      if (It != AAQI.AliasCache.end())
+        It->second.NumAssumptionUses = AAQueryInfo::CacheEntry::Definitive;
+    }
+    AAQI.AssumptionBasedResults.clear();
+    AAQI.NumAssumptionUses = 0;
+  }
   return Result;
 }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr98978.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr98978.ll
new file mode 100644
index 0000000000000..429bf13b2b87a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr98978.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s
+
+target triple = "x86_64-redhat-linux-gnu"
+
+; The load+store sequence inside bb10 should not get vectorized. Previously,
+; we incorrectly determined that the pointers do not alias, because a cache
+; entry based indirectly on a disproven NoAlias assumption was not cleared
+; from the BatchAA cache.
+define void @test(ptr %p1, i64 %arg1, i64 %arg2) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
+; CHECK-NEXT:  [[_PREHEADER48_PREHEADER_1:.*]]:
+; CHECK-NEXT:    br label %[[_LOOPEXIT49_1:.*]]
+; CHECK:       [[_LOOPEXIT49_1]]:
+; CHECK-NEXT:    [[I:%.*]] = phi ptr [ [[I21:%.*]], %[[BB20:.*]] ], [ [[P1]], %[[_PREHEADER48_PREHEADER_1]] ]
+; CHECK-NEXT:    br i1 false, label %[[BB22:.*]], label %[[DOTPREHEADER48_PREHEADER_1:.*]]
+; CHECK:       [[DEAD:.*]]:
+; CHECK-NEXT:    br label %[[DOTPREHEADER48_PREHEADER_1]]
+; CHECK:       [[_PREHEADER48_PREHEADER_2:.*:]]
+; CHECK-NEXT:    [[I5:%.*]] = phi ptr [ [[I]], %[[DEAD]] ], [ [[I]], %[[_LOOPEXIT49_1]] ]
+; CHECK-NEXT:    br label %[[DOTLOOPEXIT49_1:.*]]
+; CHECK:       [[DEAD1:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[DOTLOOPEXIT49_1]], label %[[BB20]]
+; CHECK:       [[_LOOPEXIT49_2:.*:]]
+; CHECK-NEXT:    [[I6:%.*]] = phi ptr [ [[I5]], %[[DEAD1]] ], [ [[I5]], %[[DOTPREHEADER48_PREHEADER_1]] ]
+; CHECK-NEXT:    [[I7:%.*]] = getelementptr i8, ptr [[I6]], i64 [[ARG1]]
+; CHECK-NEXT:    br label %[[BB10:.*]]
+; CHECK:       [[DEAD2:.*]]:
+; CHECK-NEXT:    br label %[[BB10]]
+; CHECK:       [[BB10]]:
+; CHECK-NEXT:    [[I11:%.*]] = phi ptr [ [[I7]], %[[DOTLOOPEXIT49_1]] ], [ null, %[[DEAD2]] ]
+; CHECK-NEXT:    [[I16:%.*]] = getelementptr i8, ptr [[I11]], i64 8
+; CHECK-NEXT:    [[I17:%.*]] = load i64, ptr [[I16]], align 1
+; CHECK-NEXT:    store i64 [[I17]], ptr [[I6]], align 1
+; CHECK-NEXT:    [[I18:%.*]] = getelementptr i8, ptr [[I6]], i64 8
+; CHECK-NEXT:    [[I19:%.*]] = load i64, ptr [[I11]], align 1
+; CHECK-NEXT:    store i64 [[I19]], ptr [[I18]], align 1
+; CHECK-NEXT:    br label %[[BB20]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    [[I21]] = phi ptr [ [[I5]], %[[DEAD1]] ], [ [[I6]], %[[BB10]] ]
+; CHECK-NEXT:    br label %[[_LOOPEXIT49_1]]
+; CHECK:       [[BB22]]:
+; CHECK-NEXT:    [[I23:%.*]] = getelementptr i8, ptr [[I]], i64 [[ARG2]]
+; CHECK-NEXT:    [[I25:%.*]] = getelementptr i8, ptr [[I23]], i64 8
+; CHECK-NEXT:    br label %[[BB26:.*]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    [[I27:%.*]] = phi ptr [ null, %[[BB26]] ], [ [[I25]], %[[BB22]] ]
+; CHECK-NEXT:    store i64 0, ptr [[I27]], align 1
+; CHECK-NEXT:    [[I28:%.*]] = getelementptr i8, ptr [[I27]], i64 8
+; CHECK-NEXT:    [[I29:%.*]] = load i64, ptr [[I23]], align 1
+; CHECK-NEXT:    store i64 0, ptr [[I28]], align 1
+; CHECK-NEXT:    br label %[[BB26]]
+;
+entry:
+  br label %loop1
+
+loop1:                                            ; preds = %bb20, %entry
+  %i = phi ptr [ %i21, %bb20 ], [ %p1, %entry ]
+  br i1 false, label %bb22, label %.preheader48.preheader.1
+
+dead:                                             ; No predecessors!
+  br label %.preheader48.preheader.1
+
+.preheader48.preheader.1:                         ; preds = %dead, %loop1
+  %i5 = phi ptr [ %i, %dead ], [ %i, %loop1 ]
+  br label %.loopexit49.1
+
+dead1:                                    ; No predecessors!
+  br i1 false, label %.loopexit49.1, label %bb20
+
+.loopexit49.1:                                    ; preds = %dead1, %.preheader48.preheader.1
+  %i6 = phi ptr [ %i5, %dead1 ], [ %i5, %.preheader48.preheader.1 ]
+  %i7 = getelementptr i8, ptr %i6, i64 %arg1
+  br label %bb10
+
+dead2:                                            ; No predecessors!
+  br label %bb10
+
+bb10:                                             ; preds = %dead2, %.loopexit49.1
+  %i11 = phi ptr [ %i7, %.loopexit49.1 ], [ null, %dead2 ]
+  %i16 = getelementptr i8, ptr %i11, i64 8
+  %i17 = load i64, ptr %i16, align 1
+  store i64 %i17, ptr %i6, align 1
+  %i18 = getelementptr i8, ptr %i6, i64 8
+  %i19 = load i64, ptr %i11, align 1
+  store i64 %i19, ptr %i18, align 1
+  br label %bb20
+
+bb20:                                             ; preds = %bb10, %dead1
+  %i21 = phi ptr [ %i5, %dead1 ], [ %i6, %bb10 ]
+  br label %loop1
+
+bb22:                                             ; preds = %loop1
+  %i23 = getelementptr i8, ptr %i, i64 %arg2
+  %i25 = getelementptr i8, ptr %i23, i64 8
+  br label %bb26
+
+bb26:                                             ; preds = %bb26, %bb22
+  %i27 = phi ptr [ null, %bb26 ], [ %i25, %bb22 ]
+  store i64 0, ptr %i27, align 1
+  %i28 = getelementptr i8, ptr %i27, i64 8
+  %i29 = load i64, ptr %i23, align 1
+  store i64 0, ptr %i28, align 1
+  br label %bb26
+}

From 58f851dfb66dcd0af89d0e2da483a358c3643114 Mon Sep 17 00:00:00 2001
From: Daniil Kovalev <dkovalev@accesssoftek.com>
Date: Thu, 25 Jul 2024 22:21:03 +0300
Subject: [PATCH 043/427] [PAC] Sign LR with B key for non-leaf functions with
 ptrauth-returns attr (#100552)

For pauthtest ABI, there is a bunch of ptrauth-* options, including
ptrauth-returns. Use "ptrauth-returns" function attribute to indicate
need for LR signing with B key for non-leaf function to avoid using
"sign-return-address" and "sign-return-address-key" which were
originally designed for pac-ret.

Co-authored-by: Ahmed Bougacha <ahmed@bougacha.org>
Co-authored-by: Anatoly Trosinenko <atrosinenko@accesssoftek.com>
(cherry picked from commit 56fd2472d887392855ad85c53df5782a2c3f8ddb)
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |   7 +-
 .../AArch64/AArch64MachineFunctionInfo.cpp    |   4 +
 .../lib/Target/AArch64/AArch64PointerAuth.cpp |   3 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |   9 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |   3 +-
 llvm/test/CodeGen/AArch64/ptrauth-ret-trap.ll |  98 ++++++++
 llvm/test/CodeGen/AArch64/ptrauth-ret.ll      | 225 ++++++++++++++++++
 7 files changed, 344 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-ret-trap.ll
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-ret.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1b301a4a05fc5..377bcd5868fb6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -8339,7 +8339,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
     NumBytesToCreateFrame += 8;
 
     // PAuth is enabled - set extra tail call cost, if any.
-    auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
+    auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
+        *RepeatedSequenceLocs[0].getMF());
     NumBytesToCheckLRInTCEpilogue =
         AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod);
     // Checking the authenticated LR value may significantly impact
@@ -8700,6 +8701,10 @@ void AArch64InstrInfo::mergeOutliningCandidateAttributes(
   // behaviour of one of them
   const auto &CFn = Candidates.front().getMF()->getFunction();
 
+  if (CFn.hasFnAttribute("ptrauth-returns"))
+    F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
+  if (CFn.hasFnAttribute("ptrauth-auth-traps"))
+    F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
   // Since all candidates belong to the same module, just copy the
   // function-level attributes of an arbitrary function.
   if (CFn.hasFnAttribute("sign-return-address"))
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index 201e8047b3686..e96c5a953ff2b 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -38,6 +38,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
 }
 
 static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
+  if (F.hasFnAttribute("ptrauth-returns"))
+    return {true, false}; // non-leaf
   // The function should be signed in the following situations:
   // - sign-return-address=all
   // - sign-return-address=non-leaf and the functions spills the LR
@@ -56,6 +58,8 @@ static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
 }
 
 static bool ShouldSignWithBKey(const Function &F, const AArch64Subtarget &STI) {
+  if (F.hasFnAttribute("ptrauth-returns"))
+    return true;
   if (!F.hasFnAttribute("sign-return-address-key")) {
     if (STI.getTargetTriple().isOSWindows())
       return true;
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 465e689d4a7a5..92ab4b5c3d251 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -341,7 +341,8 @@ bool AArch64PointerAuth::checkAuthenticatedLR(
   AArch64PACKey::ID KeyId =
       MFnI->shouldSignWithBKey() ? AArch64PACKey::IB : AArch64PACKey::IA;
 
-  AuthCheckMethod Method = Subtarget->getAuthenticatedLRCheckMethod();
+  AuthCheckMethod Method =
+      Subtarget->getAuthenticatedLRCheckMethod(*TI->getMF());
 
   if (Method == AuthCheckMethod::None)
     return false;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32a355fe38f1c..642006e706c13 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -565,8 +565,13 @@ bool AArch64Subtarget::useAA() const { return UseAA; }
 // exception on its own. Later, if the callee spills the signed LR value and
 // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
 // the higher bits of LR thus hiding the authentication failure.
-AArch64PAuth::AuthCheckMethod
-AArch64Subtarget::getAuthenticatedLRCheckMethod() const {
+AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
+    const MachineFunction &MF) const {
+  // TODO: Check subtarget for the scheme. Present variant is a default for
+  // pauthtest ABI.
+  if (MF.getFunction().hasFnAttribute("ptrauth-returns") &&
+      MF.getFunction().hasFnAttribute("ptrauth-auth-traps"))
+    return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
   if (AuthenticatedLRCheckMethod.getNumOccurrences())
     return AuthenticatedLRCheckMethod;
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e585aad2f7a68..0f3a637f98fbe 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -413,7 +413,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   }
 
   /// Choose a method of checking LR before performing a tail call.
-  AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod() const;
+  AArch64PAuth::AuthCheckMethod
+  getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
 
   /// Compute the integer discriminator for a given BlockAddress constant, if
   /// blockaddress signing is enabled, or std::nullopt otherwise.
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-ret-trap.ll b/llvm/test/CodeGen/AArch64/ptrauth-ret-trap.ll
new file mode 100644
index 0000000000000..42a3050eda112
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-ret-trap.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mtriple aarch64-linux-gnu -mattr=+pauth -asm-verbose=false -disable-post-ra -o - %s | FileCheck %s
+
+; CHECK-LABEL:  test_tailcall:
+; CHECK-NEXT:   pacibsp
+; CHECK-NEXT:   str x30, [sp, #-16]!
+; CHECK-NEXT:   bl bar
+; CHECK-NEXT:   ldr x30, [sp], #16
+; CHECK-NEXT:   autibsp
+; CHECK-NEXT:   eor x16, x30, x30, lsl #1
+; CHECK-NEXT:   tbnz x16, #62, [[BAD:.L.*]]
+; CHECK-NEXT:   b bar
+; CHECK-NEXT:   [[BAD]]:
+; CHECK-NEXT:   brk #0xc471
+define i32 @test_tailcall() #0 {
+  call i32 @bar()
+  %c = tail call i32 @bar()
+  ret i32 %c
+}
+
+; CHECK-LABEL: test_tailcall_noframe:
+; CHECK-NEXT:  b bar
+define i32 @test_tailcall_noframe() #0 {
+  %c = tail call i32 @bar()
+  ret i32 %c
+}
+
+; CHECK-LABEL: test_tailcall_indirect:
+; CHECK:         autibsp
+; CHECK:         eor     x16, x30, x30, lsl #1
+; CHECK:         tbnz    x16, #62, [[BAD:.L.*]]
+; CHECK:         br      x0
+; CHECK: [[BAD]]:
+; CHECK:         brk     #0xc471
+define void @test_tailcall_indirect(ptr %fptr) #0 {
+  call i32 @test_tailcall()
+  tail call void %fptr()
+  ret void
+}
+
+; CHECK-LABEL: test_tailcall_indirect_in_x9:
+; CHECK:         autibsp
+; CHECK:         eor     x16, x30, x30, lsl #1
+; CHECK:         tbnz    x16, #62, [[BAD:.L.*]]
+; CHECK:         br      x9
+; CHECK: [[BAD]]:
+; CHECK:         brk     #0xc471
+define void @test_tailcall_indirect_in_x9(ptr sret(i64) %ret, [8 x i64] %in, ptr %fptr) #0 {
+  %ptr = alloca i8, i32 16
+  call i32 @test_tailcall()
+  tail call void %fptr(ptr sret(i64) %ret, [8 x i64] %in)
+  ret void
+}
+
+; CHECK-LABEL: test_auth_tailcall_indirect:
+; CHECK:         autibsp
+; CHECK:         eor     x16, x30, x30, lsl #1
+; CHECK:         tbnz    x16, #62, [[BAD:.L.*]]
+; CHECK:         mov x16, #42
+; CHECK:         braa      x0, x16
+; CHECK: [[BAD]]:
+; CHECK:         brk     #0xc471
+define void @test_auth_tailcall_indirect(ptr %fptr) #0 {
+  call i32 @test_tailcall()
+  tail call void %fptr() [ "ptrauth"(i32 0, i64 42) ]
+  ret void
+}
+
+; CHECK-LABEL: test_auth_tailcall_indirect_in_x9:
+; CHECK:         autibsp
+; CHECK:         eor     x16, x30, x30, lsl #1
+; CHECK:         tbnz    x16, #62, [[BAD:.L.*]]
+; CHECK:         brabz      x9
+; CHECK: [[BAD]]:
+; CHECK:         brk     #0xc471
+define void @test_auth_tailcall_indirect_in_x9(ptr sret(i64) %ret, [8 x i64] %in, ptr %fptr) #0 {
+  %ptr = alloca i8, i32 16
+  call i32 @test_tailcall()
+  tail call void %fptr(ptr sret(i64) %ret, [8 x i64] %in) [ "ptrauth"(i32 1, i64 0) ]
+  ret void
+}
+
+; CHECK-LABEL: test_auth_tailcall_indirect_bti:
+; CHECK:         autibsp
+; CHECK:         eor     x17, x30, x30, lsl #1
+; CHECK:         tbnz    x17, #62, [[BAD:.L.*]]
+; CHECK:         brabz      x16
+; CHECK: [[BAD]]:
+; CHECK:         brk     #0xc471
+define void @test_auth_tailcall_indirect_bti(ptr sret(i64) %ret, [8 x i64] %in, ptr %fptr) #0 "branch-target-enforcement"="true" {
+  %ptr = alloca i8, i32 16
+  call i32 @test_tailcall()
+  tail call void %fptr(ptr sret(i64) %ret, [8 x i64] %in) [ "ptrauth"(i32 1, i64 0) ]
+  ret void
+}
+
+declare i32 @bar()
+
+attributes #0 = { nounwind "ptrauth-returns" "ptrauth-auth-traps" }
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-ret.ll b/llvm/test/CodeGen/AArch64/ptrauth-ret.ll
new file mode 100644
index 0000000000000..61f5f6d9d23b7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-ret.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -disable-post-ra \
+; RUN:   -global-isel=0 -o - %s | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -verify-machineinstrs -disable-post-ra \
+; RUN:   -global-isel=1 -global-isel-abort=1 -o - %s | FileCheck %s
+
+define i32 @test() #0 {
+; CHECK-LABEL: test:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    str x19, [sp, #-16]!
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    ldr x19, [sp], #16
+; CHECK-NEXT:    ret
+  call void asm sideeffect "", "~{x19}"()
+  ret i32 0
+}
+
+define i32 @test_alloca() #0 {
+; CHECK-LABEL: test_alloca:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+  %p = alloca i8, i32 32
+  call void asm sideeffect "", "r"(ptr %p)
+  ret i32 0
+}
+
+define i32 @test_realign_alloca() #0 {
+; CHECK-LABEL: test_realign_alloca:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    sub x9, sp, #112
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff80
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    mov sp, x29
+; CHECK-NEXT:    ldp x29, x30, [sp], #16
+; CHECK-NEXT:    retab
+  %p = alloca i8, i32 32, align 128
+  call void asm sideeffect "", "r"(ptr %p)
+  ret i32 0
+}
+
+define i32 @test_big_alloca() #0 {
+; CHECK-LABEL: test_big_alloca:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]!
+; CHECK-NEXT:    sub sp, sp, #1024
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    add sp, sp, #1024
+; CHECK-NEXT:    ldr x29, [sp], #16
+; CHECK-NEXT:    ret
+  %p = alloca i8, i32 1024
+  call void asm sideeffect "", "r"(ptr %p)
+  ret i32 0
+}
+
+define i32 @test_var_alloca(i32 %s) #0 {
+  %p = alloca i8, i32 %s
+  call void asm sideeffect "", "r"(ptr %p)
+  ret i32 0
+}
+
+define i32 @test_noframe_saved(ptr %p) #0 {
+; CHECK-LABEL: test_noframe_saved:
+; CHECK:       %bb.0:
+
+
+; CHECK-NEXT:  str     x29, [sp, #-96]!
+; CHECK-NEXT:  stp     x28, x27, [sp, #16]
+; CHECK-NEXT:  stp     x26, x25, [sp, #32]
+; CHECK-NEXT:  stp     x24, x23, [sp, #48]
+; CHECK-NEXT:  stp     x22, x21, [sp, #64]
+; CHECK-NEXT:  stp     x20, x19, [sp, #80]
+; CHECK-NEXT:  ldr     w29, [x0]
+; CHECK-NEXT:  //APP
+; CHECK-NEXT:  //NO_APP
+; CHECK-NEXT:  mov     w0, w29
+; CHECK-NEXT:  ldp     x20, x19, [sp, #80]
+; CHECK-NEXT:  ldp     x22, x21, [sp, #64]
+; CHECK-NEXT:  ldp     x24, x23, [sp, #48]
+; CHECK-NEXT:  ldp     x26, x25, [sp, #32]
+; CHECK-NEXT:  ldp     x28, x27, [sp, #16]
+; CHECK-NEXT:  ldr     x29, [sp], #96
+; CHECK-NEXT:  ret
+  %v = load i32, ptr %p
+  call void asm sideeffect "", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"()
+  ret i32 %v
+}
+
+define void @test_noframe() #0 {
+; CHECK-LABEL: test_noframe:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    ret
+  ret void
+}
+
+; FIXME: Inefficient lowering of @llvm.returnaddress
+define ptr @test_returnaddress_0() #0 {
+; CHECK-LABEL: test_returnaddress_0:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    str x30, [sp, #-16]!
+; CHECK-NEXT:    xpaci x30
+; CHECK-NEXT:    mov x0, x30
+; CHECK-NEXT:    ldr x30, [sp], #16
+; CHECK-NEXT:    retab
+  %r = call ptr @llvm.returnaddress(i32 0)
+  ret ptr %r
+}
+
+define ptr @test_returnaddress_1() #0 {
+; CHECK-LABEL: test_returnaddress_1:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]!
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    ldr x8, [x29]
+; CHECK-NEXT:    ldr x0, [x8, #8]
+; CHECK-NEXT:    xpaci x0
+; CHECK-NEXT:    ldp x29, x30, [sp], #16
+; CHECK-NEXT:    retab
+  %r = call ptr @llvm.returnaddress(i32 1)
+  ret ptr %r
+}
+
+define void @test_noframe_alloca() #0 {
+; CHECK-LABEL: test_noframe_alloca:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    add x8, sp, #12
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+  %p = alloca i8, i32 1
+  call void asm sideeffect "", "r"(ptr %p)
+  ret void
+}
+
+define void @test_call() #0 {
+; CHECK-LABEL: test_call:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    str x30, [sp, #-16]!
+; CHECK-NEXT:    bl bar
+; CHECK-NEXT:    ldr x30, [sp], #16
+; CHECK-NEXT:    retab
+  call i32 @bar()
+  ret void
+}
+
+define void @test_call_alloca() #0 {
+; CHECK-LABEL: test_call_alloca:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    str x30, [sp, #-16]
+; CHECK-NEXT:    bl bar
+; CHECK-NEXT:    ldr x30, [sp], #16
+; CHECK-NEXT:    retab
+  alloca i8
+  call i32 @bar()
+  ret void
+}
+
+define void @test_call_shrinkwrapping(i1 %c) #0 {
+; CHECK-LABEL: test_call_shrinkwrapping:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    tbz w0, #0, .LBB12_2
+; CHECK-NEXT:  %bb.1:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    str x30, [sp, #-16]!
+; CHECK-NEXT:    bl bar
+; CHECK-NEXT:    ldr x30, [sp], #16
+; CHECK-NEXT:    autibsp
+; CHECK-NEXT:  LBB12_2:
+; CHECK-NEXT:    ret
+  br i1 %c, label %tbb, label %fbb
+tbb:
+  call i32 @bar()
+  br label %fbb
+fbb:
+  ret void
+}
+
+define i32 @test_tailcall() #0 {
+; CHECK-LABEL: test_tailcall:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    pacibsp
+; CHECK-NEXT:    str x30, [sp, #-16]!
+; CHECK-NEXT:    bl bar
+; CHECK-NEXT:    ldr x30, [sp], #16
+; CHECK-NEXT:    autibsp
+; CHECK-NEXT:    b bar
+  call i32 @bar()
+  %c = tail call i32 @bar()
+  ret i32 %c
+}
+
+define i32 @test_tailcall_noframe() #0 {
+; CHECK-LABEL: test_tailcall_noframe:
+; CHECK:       %bb.0:
+; CHECK-NEXT:    b bar
+  %c = tail call i32 @bar()
+  ret i32 %c
+}
+
+declare i32 @bar()
+
+declare ptr @llvm.returnaddress(i32)
+
+attributes #0 = { nounwind "ptrauth-returns" }

From 2a986c55d135d1da7268c73ede08789497b7f992 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer@amd.com>
Date: Thu, 25 Jul 2024 13:52:50 +0100
Subject: [PATCH 044/427] [flang][debug] Set scope of internal functions
 correctly. (#99531)

Summary:
The functions internal to subroutine should have the scope set to the
parent function. This allows a user to evaluate local variables of
parent function when control is stopped in the child.

Fixes #96314

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D60250527

(cherry picked from commit 626022bfd18f335ef62a461992a05dfed4e6d715)
---
 .../lib/Optimizer/Transforms/AddDebugInfo.cpp | 181 ++++++++++--------
 flang/test/Transforms/debug-96314.fir         |  26 +++
 2 files changed, 132 insertions(+), 75 deletions(-)
 create mode 100644 flang/test/Transforms/debug-96314.fir

diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
index 685a8645fa2fc..8751a3b2c322f 100644
--- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
+++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
@@ -66,6 +66,9 @@ class AddDebugInfoPass : public fir::impl::AddDebugInfoBase<AddDebugInfoPass> {
   void handleGlobalOp(fir::GlobalOp glocalOp, mlir::LLVM::DIFileAttr fileAttr,
                       mlir::LLVM::DIScopeAttr scope,
                       mlir::SymbolTable *symbolTable);
+  void handleFuncOp(mlir::func::FuncOp funcOp, mlir::LLVM::DIFileAttr fileAttr,
+                    mlir::LLVM::DICompileUnitAttr cuAttr,
+                    mlir::SymbolTable *symbolTable);
 };
 
 static uint32_t getLineFromLoc(mlir::Location loc) {
@@ -204,11 +207,112 @@ void AddDebugInfoPass::handleGlobalOp(fir::GlobalOp globalOp,
   globalOp->setLoc(builder.getFusedLoc({globalOp->getLoc()}, gvAttr));
 }
 
+void AddDebugInfoPass::handleFuncOp(mlir::func::FuncOp funcOp,
+                                    mlir::LLVM::DIFileAttr fileAttr,
+                                    mlir::LLVM::DICompileUnitAttr cuAttr,
+                                    mlir::SymbolTable *symbolTable) {
+  mlir::Location l = funcOp->getLoc();
+  // If fused location has already been created then nothing to do
+  // Otherwise, create a fused location.
+  if (debugInfoIsAlreadySet(l))
+    return;
+
+  mlir::ModuleOp module = getOperation();
+  mlir::MLIRContext *context = &getContext();
+  mlir::OpBuilder builder(context);
+  llvm::StringRef fileName(fileAttr.getName());
+  llvm::StringRef filePath(fileAttr.getDirectory());
+  unsigned int CC = (funcOp.getName() == fir::NameUniquer::doProgramEntry())
+                        ? llvm::dwarf::getCallingConvention("DW_CC_program")
+                        : llvm::dwarf::getCallingConvention("DW_CC_normal");
+
+  if (auto funcLoc = mlir::dyn_cast<mlir::FileLineColLoc>(l)) {
+    fileName = llvm::sys::path::filename(funcLoc.getFilename().getValue());
+    filePath = llvm::sys::path::parent_path(funcLoc.getFilename().getValue());
+  }
+
+  mlir::StringAttr fullName = mlir::StringAttr::get(context, funcOp.getName());
+  mlir::Attribute attr = funcOp->getAttr(fir::getInternalFuncNameAttrName());
+  mlir::StringAttr funcName =
+      (attr) ? mlir::cast<mlir::StringAttr>(attr)
+             : mlir::StringAttr::get(context, funcOp.getName());
+
+  auto result = fir::NameUniquer::deconstruct(funcName);
+  funcName = mlir::StringAttr::get(context, result.second.name);
+
+  llvm::SmallVector<mlir::LLVM::DITypeAttr> types;
+  fir::DebugTypeGenerator typeGen(module);
+  for (auto resTy : funcOp.getResultTypes()) {
+    auto tyAttr = typeGen.convertType(resTy, fileAttr, cuAttr, funcOp.getLoc());
+    types.push_back(tyAttr);
+  }
+  for (auto inTy : funcOp.getArgumentTypes()) {
+    auto tyAttr = typeGen.convertType(fir::unwrapRefType(inTy), fileAttr,
+                                      cuAttr, funcOp.getLoc());
+    types.push_back(tyAttr);
+  }
+
+  mlir::LLVM::DISubroutineTypeAttr subTypeAttr =
+      mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types);
+  mlir::LLVM::DIFileAttr funcFileAttr =
+      mlir::LLVM::DIFileAttr::get(context, fileName, filePath);
+
+  // Only definitions need a distinct identifier and a compilation unit.
+  mlir::DistinctAttr id;
+  mlir::LLVM::DIScopeAttr Scope = fileAttr;
+  mlir::LLVM::DICompileUnitAttr compilationUnit;
+  mlir::LLVM::DISubprogramFlags subprogramFlags =
+      mlir::LLVM::DISubprogramFlags{};
+  if (isOptimized)
+    subprogramFlags = mlir::LLVM::DISubprogramFlags::Optimized;
+  if (!funcOp.isExternal()) {
+    id = mlir::DistinctAttr::create(mlir::UnitAttr::get(context));
+    compilationUnit = cuAttr;
+    subprogramFlags =
+        subprogramFlags | mlir::LLVM::DISubprogramFlags::Definition;
+  }
+  unsigned line = getLineFromLoc(l);
+  if (fir::isInternalProcedure(funcOp)) {
+    // For contained functions, the scope is the parent subroutine.
+    mlir::SymbolRefAttr sym = mlir::cast<mlir::SymbolRefAttr>(
+        funcOp->getAttr(fir::getHostSymbolAttrName()));
+    if (sym) {
+      if (auto func =
+              symbolTable->lookup<mlir::func::FuncOp>(sym.getLeafReference())) {
+        // Make sure that parent is processed.
+        handleFuncOp(func, fileAttr, cuAttr, symbolTable);
+        if (auto fusedLoc =
+                mlir::dyn_cast_if_present<mlir::FusedLoc>(func.getLoc())) {
+          if (auto spAttr =
+                  mlir::dyn_cast_if_present<mlir::LLVM::DISubprogramAttr>(
+                      fusedLoc.getMetadata()))
+            Scope = spAttr;
+        }
+      }
+    }
+  } else if (!result.second.modules.empty()) {
+    Scope = getOrCreateModuleAttr(result.second.modules[0], fileAttr, cuAttr,
+                                  line - 1, false);
+  }
+
+  auto spAttr = mlir::LLVM::DISubprogramAttr::get(
+      context, id, compilationUnit, Scope, funcName, fullName, funcFileAttr,
+      line, line, subprogramFlags, subTypeAttr);
+  funcOp->setLoc(builder.getFusedLoc({funcOp->getLoc()}, spAttr));
+
+  // Don't process variables if user asked for line tables only.
+  if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly)
+    return;
+
+  funcOp.walk([&](fir::cg::XDeclareOp declOp) {
+    handleDeclareOp(declOp, fileAttr, spAttr, typeGen, symbolTable);
+  });
+}
+
 void AddDebugInfoPass::runOnOperation() {
   mlir::ModuleOp module = getOperation();
   mlir::MLIRContext *context = &getContext();
   mlir::SymbolTable symbolTable(module);
-  mlir::OpBuilder builder(context);
   llvm::StringRef fileName;
   std::string filePath;
   // We need 2 type of file paths here.
@@ -245,80 +349,7 @@ void AddDebugInfoPass::runOnOperation() {
       isOptimized, debugLevel);
 
   module.walk([&](mlir::func::FuncOp funcOp) {
-    mlir::Location l = funcOp->getLoc();
-    // If fused location has already been created then nothing to do
-    // Otherwise, create a fused location.
-    if (debugInfoIsAlreadySet(l))
-      return;
-
-    unsigned int CC = (funcOp.getName() == fir::NameUniquer::doProgramEntry())
-                          ? llvm::dwarf::getCallingConvention("DW_CC_program")
-                          : llvm::dwarf::getCallingConvention("DW_CC_normal");
-
-    if (auto funcLoc = mlir::dyn_cast<mlir::FileLineColLoc>(l)) {
-      fileName = llvm::sys::path::filename(funcLoc.getFilename().getValue());
-      filePath = llvm::sys::path::parent_path(funcLoc.getFilename().getValue());
-    }
-
-    mlir::StringAttr fullName =
-        mlir::StringAttr::get(context, funcOp.getName());
-    mlir::Attribute attr = funcOp->getAttr(fir::getInternalFuncNameAttrName());
-    mlir::StringAttr funcName =
-        (attr) ? mlir::cast<mlir::StringAttr>(attr)
-               : mlir::StringAttr::get(context, funcOp.getName());
-
-    auto result = fir::NameUniquer::deconstruct(funcName);
-    funcName = mlir::StringAttr::get(context, result.second.name);
-
-    llvm::SmallVector<mlir::LLVM::DITypeAttr> types;
-    fir::DebugTypeGenerator typeGen(module);
-    for (auto resTy : funcOp.getResultTypes()) {
-      auto tyAttr =
-          typeGen.convertType(resTy, fileAttr, cuAttr, funcOp.getLoc());
-      types.push_back(tyAttr);
-    }
-    for (auto inTy : funcOp.getArgumentTypes()) {
-      auto tyAttr = typeGen.convertType(fir::unwrapRefType(inTy), fileAttr,
-                                        cuAttr, funcOp.getLoc());
-      types.push_back(tyAttr);
-    }
-
-    mlir::LLVM::DISubroutineTypeAttr subTypeAttr =
-        mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types);
-    mlir::LLVM::DIFileAttr funcFileAttr =
-        mlir::LLVM::DIFileAttr::get(context, fileName, filePath);
-
-    // Only definitions need a distinct identifier and a compilation unit.
-    mlir::DistinctAttr id;
-    mlir::LLVM::DIScopeAttr Scope = fileAttr;
-    mlir::LLVM::DICompileUnitAttr compilationUnit;
-    mlir::LLVM::DISubprogramFlags subprogramFlags =
-        mlir::LLVM::DISubprogramFlags{};
-    if (isOptimized)
-      subprogramFlags = mlir::LLVM::DISubprogramFlags::Optimized;
-    if (!funcOp.isExternal()) {
-      id = mlir::DistinctAttr::create(mlir::UnitAttr::get(context));
-      compilationUnit = cuAttr;
-      subprogramFlags =
-          subprogramFlags | mlir::LLVM::DISubprogramFlags::Definition;
-    }
-    unsigned line = getLineFromLoc(l);
-    if (!result.second.modules.empty())
-      Scope = getOrCreateModuleAttr(result.second.modules[0], fileAttr, cuAttr,
-                                    line - 1, false);
-
-    auto spAttr = mlir::LLVM::DISubprogramAttr::get(
-        context, id, compilationUnit, Scope, funcName, fullName, funcFileAttr,
-        line, line, subprogramFlags, subTypeAttr);
-    funcOp->setLoc(builder.getFusedLoc({funcOp->getLoc()}, spAttr));
-
-    // Don't process variables if user asked for line tables only.
-    if (debugLevel == mlir::LLVM::DIEmissionKind::LineTablesOnly)
-      return;
-
-    funcOp.walk([&](fir::cg::XDeclareOp declOp) {
-      handleDeclareOp(declOp, fileAttr, spAttr, typeGen, &symbolTable);
-    });
+    handleFuncOp(funcOp, fileAttr, cuAttr, &symbolTable);
   });
   // Process any global which was not processed through DeclareOp.
   if (debugLevel == mlir::LLVM::DIEmissionKind::Full) {
diff --git a/flang/test/Transforms/debug-96314.fir b/flang/test/Transforms/debug-96314.fir
new file mode 100644
index 0000000000000..e2d0f24a1105c
--- /dev/null
+++ b/flang/test/Transforms/debug-96314.fir
@@ -0,0 +1,26 @@
+// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s -o - | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
+  func.func @_QMhelperPmod_sub(%arg0: !fir.ref<i32> {fir.bindc_name = "a"} ) {
+    return
+  } loc(#loc1)
+  func.func private @_QMhelperFmod_subPchild1(%arg0: !fir.ref<i32> {fir.bindc_name = "b"} ) attributes {fir.host_symbol = @_QMhelperPmod_sub, llvm.linkage = #llvm.linkage<internal>} {
+    return
+  } loc(#loc2)
+  func.func @global_sub_(%arg0: !fir.ref<i32> {fir.bindc_name = "n"} ) attributes {fir.internal_name = "_QPglobal_sub"} {
+    return
+  } loc(#loc3)
+  func.func private @_QFglobal_subPchild2(%arg0: !fir.ref<i32> {fir.bindc_name = "c"}) attributes {fir.host_symbol = @global_sub_, llvm.linkage = #llvm.linkage<internal>} {
+    return
+  } loc(#loc4)
+}
+
+#loc1 = loc("test.f90":5:1)
+#loc2 = loc("test.f90":15:1)
+#loc3 = loc("test.f90":25:1)
+#loc4 = loc("test.f90":35:1)
+
+// CHECK-DAG: #[[SP1:.*]] = #llvm.di_subprogram<{{.*}}name = "mod_sub"{{.*}}>
+// CHECK-DAG: #llvm.di_subprogram<{{.*}}scope = #[[SP1]], name = "child1"{{.*}}>
+// CHECK-DAG: #[[SP2:.*]] = #llvm.di_subprogram<{{.*}}linkageName = "global_sub_"{{.*}}>
+// CHECK-DAG: #llvm.di_subprogram<{{.*}}scope = #[[SP2]], name = "child2"{{.*}}>

From 8bbb8ba5e7178a5f61361448d7bc345d3f29b997 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 23 Jul 2024 16:52:51 +0200
Subject: [PATCH 045/427] [Utils] Updates to bump-version.py (#100089)

* Add support for --git flag to bump version for a git suffix
* Update location of the new file where the version is stored
---
 llvm/utils/release/bump-version.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/utils/release/bump-version.py b/llvm/utils/release/bump-version.py
index abff67ae926ac..b1799cba9363e 100755
--- a/llvm/utils/release/bump-version.py
+++ b/llvm/utils/release/bump-version.py
@@ -12,6 +12,9 @@
 
 
 class Processor:
+    def __init__(self, args):
+        self.args = args
+
     def process_line(self, line: str) -> str:
         raise NotImplementedError()
 
@@ -23,6 +26,13 @@ def process_file(self, fpath: Path, version: packaging.version.Version) -> None:
             version.micro,
             version.pre,
         )
+
+        if self.args.rc:
+            self.suffix = f"-rc{self.args.rc}"
+
+        if self.args.git:
+            self.suffix = "git"
+
         data = fpath.read_text()
         new_data = []
 
@@ -64,7 +74,7 @@ def process_line(self, line: str) -> str:
             if self.suffix:
                 nline = re.sub(
                     r"set\(LLVM_VERSION_SUFFIX(.*)\)",
-                    f"set(LLVM_VERSION_SUFFIX -{self.suffix[0]}{self.suffix[1]})",
+                    f"set(LLVM_VERSION_SUFFIX {self.suffix})",
                     line,
                 )
             else:
@@ -144,6 +154,7 @@ def process_line(self, line: str) -> str:
     )
     parser.add_argument("version", help="Version to bump to, e.g. 15.0.1", default=None)
     parser.add_argument("--rc", default=None, type=int, help="RC version")
+    parser.add_argument("--git", action="store_true", help="Git version")
     parser.add_argument(
         "-s",
         "--source-root",
@@ -153,9 +164,10 @@ def process_line(self, line: str) -> str:
 
     args = parser.parse_args()
 
+    if args.rc and args.git:
+        raise RuntimeError("Can't specify --git and --rc at the same time!")
+
     verstr = args.version
-    if args.rc:
-        verstr += f"-rc{args.rc}"
 
     # parse the version string with distutils.
     # note that -rc will end up as version.pre here
@@ -170,20 +182,20 @@ def process_line(self, line: str) -> str:
 
     files_to_update = (
         # Main CMakeLists.
-        (source_root / "llvm" / "CMakeLists.txt", CMakeProcessor()),
+        (source_root / "cmake" / "Modules" / "LLVMVersion.cmake", CMakeProcessor(args)),
         # Lit configuration
         (
             "llvm/utils/lit/lit/__init__.py",
-            LitProcessor(),
+            LitProcessor(args),
         ),
         # GN build system
         (
             "llvm/utils/gn/secondary/llvm/version.gni",
-            GNIProcessor(),
+            GNIProcessor(args),
         ),
         (
             "libcxx/include/__config",
-            LibCXXProcessor(),
+            LibCXXProcessor(args),
         ),
     )
 

From 07c2140ed9a17a4c3485ad8dba7d8d0c78593ab0 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Tue, 23 Jul 2024 12:54:18 -0700
Subject: [PATCH 046/427] [MLGO][Infra] Add mlgo-utils to bump-version script
 (#100186)

This patch adds support in the bump-version script for bumping the
version of the mlgo-utils package. This should hopefully streamline the
processor for that with the rest of the project and prevent having to
manually update this package individually.
---
 llvm/utils/release/bump-version.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/utils/release/bump-version.py b/llvm/utils/release/bump-version.py
index b1799cba9363e..5db62e88fec1d 100755
--- a/llvm/utils/release/bump-version.py
+++ b/llvm/utils/release/bump-version.py
@@ -188,6 +188,11 @@ def process_line(self, line: str) -> str:
             "llvm/utils/lit/lit/__init__.py",
             LitProcessor(args),
         ),
+        # mlgo-utils configuration
+        (
+            "llvm/utils/mlgo-utils/mlgo/__init__.py",
+            LitProcessor(args),
+        ),
         # GN build system
         (
             "llvm/utils/gn/secondary/llvm/version.gni",

From a4902a36d5c27c3a5199cd1ba91eba17910fdd68 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Fri, 26 Jul 2024 14:00:03 +0200
Subject: [PATCH 047/427] Set version to 19.1.0-rc1

---
 cmake/Modules/LLVMVersion.cmake        | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index aea9b880180ab..1ad844cbea838 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -10,6 +10,6 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX git)
+  set(LLVM_VERSION_SUFFIX -rc1)
 endif()
 
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index c5b208cfba360..09e95f3d62058 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 0, 0)
+__versioninfo__ = (19, 1, 0)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From fb26667a0e53f27c55bec9e6e9dc97f05905d423 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Wed, 24 Jul 2024 19:22:18 -0700
Subject: [PATCH 048/427] Revert "[clang-format] Fix a bug in annotating `*` in
 `#define`s (#99433)"

This reverts commit ce1a87437cc143889665c41046107e84cdf6246e.

Closes #100304.

(cherry picked from commit 7e7a9069d4240d2ae619cb50eba09f948c537ce3)
---
 clang/lib/Format/TokenAnnotator.cpp           | 19 +++++-------------
 clang/unittests/Format/TokenAnnotatorTest.cpp | 20 -------------------
 2 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 21924a8fe17d1..5c11f3cb1a874 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -372,6 +372,10 @@ class AnnotatingParser {
                OpeningParen.Previous->is(tok::kw__Generic)) {
       Contexts.back().ContextType = Context::C11GenericSelection;
       Contexts.back().IsExpression = true;
+    } else if (Line.InPPDirective &&
+               (!OpeningParen.Previous ||
+                OpeningParen.Previous->isNot(tok::identifier))) {
+      Contexts.back().IsExpression = true;
     } else if (Contexts[Contexts.size() - 2].CaretFound) {
       // This is the parameter list of an ObjC block.
       Contexts.back().IsExpression = false;
@@ -384,20 +388,7 @@ class AnnotatingParser {
                OpeningParen.Previous->MatchingParen->isOneOf(
                    TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
       Contexts.back().IsExpression = false;
-    } else if (Line.InPPDirective) {
-      auto IsExpr = [&OpeningParen] {
-        const auto *Tok = OpeningParen.Previous;
-        if (!Tok || Tok->isNot(tok::identifier))
-          return true;
-        Tok = Tok->Previous;
-        while (Tok && Tok->endsSequence(tok::coloncolon, tok::identifier)) {
-          assert(Tok->Previous);
-          Tok = Tok->Previous->Previous;
-        }
-        return !Tok || !Tok->Tok.getIdentifierInfo();
-      };
-      Contexts.back().IsExpression = IsExpr();
-    } else if (!Line.MustBeDeclaration) {
+    } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
       bool IsForOrCatch =
           OpeningParen.Previous &&
           OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index b01ca322505b1..51810ad047a26 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -75,26 +75,6 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
   EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_TypeDeclarationParen);
   EXPECT_TOKEN(Tokens[11], tok::star, TT_PointerOrReference);
 
-  Tokens = annotate("#define FOO bar(a * b)");
-  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
-  EXPECT_TOKEN(Tokens[6], tok::star, TT_BinaryOperator);
-
-  Tokens = annotate("#define FOO foo.bar(a & b)");
-  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
-  EXPECT_TOKEN(Tokens[8], tok::amp, TT_BinaryOperator);
-
-  Tokens = annotate("#define FOO foo::bar(a && b)");
-  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
-  EXPECT_TOKEN(Tokens[8], tok::ampamp, TT_BinaryOperator);
-
-  Tokens = annotate("#define FOO foo bar(a *b)");
-  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
-  EXPECT_TOKEN(Tokens[7], tok::star, TT_PointerOrReference);
-
-  Tokens = annotate("#define FOO void foo::bar(a &b)");
-  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
-  EXPECT_TOKEN(Tokens[9], tok::amp, TT_PointerOrReference);
-
   Tokens = annotate("void f() {\n"
                     "  while (p < a && *p == 'a')\n"
                     "    p++;\n"

From 102ecd39c421b3f1938b856d160c149a027eedac Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Fri, 26 Jul 2024 14:36:54 +0800
Subject: [PATCH 049/427] [LoongArch][MC] Support %[ld_/gd_/desc_]pcrel_20

Reviewed By: SixWeining, MaskRay

Pull Request: https://github.com/llvm/llvm-project/pull/100104

(cherry picked from commit e27358c8ed7abac200546e808ea30a86aa9aa580)
---
 .../AsmParser/LoongArchAsmParser.cpp          | 24 +++++++++++++++++++
 .../Target/LoongArch/LoongArchInstrInfo.td    |  6 ++++-
 .../MCTargetDesc/LoongArchFixupKinds.h        |  8 +++++++
 .../MCTargetDesc/LoongArchMCCodeEmitter.cpp   | 12 ++++++++++
 .../MCTargetDesc/LoongArchMCExpr.cpp          | 15 ++++++++++++
 .../LoongArch/MCTargetDesc/LoongArchMCExpr.h  |  4 ++++
 .../test/MC/LoongArch/Basic/Integer/invalid.s |  6 +++--
 .../MC/LoongArch/Relocations/relocations.s    | 20 ++++++++++++++++
 8 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 208bd3db8f14e..f52e188f87792 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -450,6 +450,24 @@ class LoongArchOperand : public MCParsedAsmOperand {
                      IsValidKind;
   }
 
+  bool isSImm20pcaddi() const {
+    if (!isImm())
+      return false;
+
+    int64_t Imm;
+    LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None;
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None ||
+                       VK == LoongArchMCExpr::VK_LoongArch_PCREL20_S2 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_PCREL20_S2 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_PCREL20_S2 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_PCREL20_S2;
+    return IsConstantImm
+               ? isInt<20>(Imm) && IsValidKind
+               : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
+                     IsValidKind;
+  }
+
   bool isSImm21lsl2() const {
     if (!isImm())
       return false;
@@ -1676,6 +1694,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
         /*Upper=*/(1 << 19) - 1,
         "operand must be a symbol with modifier (e.g. %call36) or an integer "
         "in the range");
+  case Match_InvalidSImm20pcaddi:
+    return generateImmOutOfRangeError(
+        Operands, ErrorInfo, /*Lower=*/-(1 << 19),
+        /*Upper=*/(1 << 19) - 1,
+        "operand must be a symbol with modifier (e.g. %pcrel_20) or an integer "
+        "in the range");
   case Match_InvalidSImm21lsl2:
     return generateImmOutOfRangeError(
         Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ec0d071453c3f..ef647a4277873 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -397,6 +397,10 @@ def simm20_pcaddu18i : SImm20Operand {
   let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">;
 }
 
+def simm20_pcaddi : SImm20Operand {
+  let ParserMatchClass = SImmAsmOperand<20, "pcaddi">;
+}
+
 def simm21_lsl2 : Operand<OtherVT> {
   let ParserMatchClass = SImmAsmOperand<21, "lsl2">;
   let EncoderMethod = "getImmOpValueAsr<2>";
@@ -754,7 +758,7 @@ def SLT  : ALU_3R<0x00120000>;
 def SLTU : ALU_3R<0x00128000>;
 def SLTI  : ALU_2RI12<0x02000000, simm12>;
 def SLTUI : ALU_2RI12<0x02400000, simm12>;
-def PCADDI    : ALU_1RI20<0x18000000, simm20>;
+def PCADDI    : ALU_1RI20<0x18000000, simm20_pcaddi>;
 def PCADDU12I : ALU_1RI20<0x1c000000, simm20>;
 def PCALAU12I : ALU_1RI20<0x1a000000, simm20_pcalau12i>;
 def AND  : ALU_3R<0x00148000>;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
index 29ed14f6e4d62..370f5b0189b51 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
@@ -111,6 +111,8 @@ enum Fixups {
   fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX,
   // Generate an R_LARCH_ALIGN which indicates the linker may fixup align here.
   fixup_loongarch_align = FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN,
+  // 20-bit fixup corresponding to %pcrel_20(foo) for instruction pcaddi.
+  fixup_loongarch_pcrel20_s2,
   // 36-bit fixup corresponding to %call36(foo) for a pair instructions:
   // pcaddu18i+jirl.
   fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36,
@@ -142,6 +144,12 @@ enum Fixups {
   fixup_loongarch_tls_le_add_r,
   // 12-bit fixup corresponding to %le_lo12_r(foo) for instruction addi.w/d.
   fixup_loongarch_tls_le_lo12_r,
+  // 20-bit fixup corresponding to %ld_pcrel_20(foo) for instruction pcaddi.
+  fixup_loongarch_tls_ld_pcrel20_s2,
+  // 20-bit fixup corresponding to %gd_pcrel_20(foo) for instruction pcaddi.
+  fixup_loongarch_tls_gd_pcrel20_s2,
+  // 20-bit fixup corresponding to %desc_pcrel_20(foo) for instruction pcaddi.
+  fixup_loongarch_tls_desc_pcrel20_s2,
 };
 } // end namespace LoongArch
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index efbfce35dbfca..4f7f93fa4783e 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -287,6 +287,18 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
     case LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12_R:
       FixupKind = LoongArch::fixup_loongarch_tls_le_lo12_r;
       break;
+    case LoongArchMCExpr::VK_LoongArch_PCREL20_S2:
+      FixupKind = LoongArch::fixup_loongarch_pcrel20_s2;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_LD_PCREL20_S2:
+      FixupKind = LoongArch::fixup_loongarch_tls_ld_pcrel20_s2;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_GD_PCREL20_S2:
+      FixupKind = LoongArch::fixup_loongarch_tls_gd_pcrel20_s2;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_PCREL20_S2:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_pcrel20_s2;
+      break;
     }
   } else if (Kind == MCExpr::SymbolRef &&
              cast<MCSymbolRefExpr>(Expr)->getKind() ==
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
index 98b9b1ab6d3f4..53d46cca913e5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
@@ -166,6 +166,14 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) {
     return "le_add_r";
   case VK_LoongArch_TLS_LE_LO12_R:
     return "le_lo12_r";
+  case VK_LoongArch_PCREL20_S2:
+    return "pcrel_20";
+  case VK_LoongArch_TLS_LD_PCREL20_S2:
+    return "ld_pcrel_20";
+  case VK_LoongArch_TLS_GD_PCREL20_S2:
+    return "gd_pcrel_20";
+  case VK_LoongArch_TLS_DESC_PCREL20_S2:
+    return "desc_pcrel_20";
   }
 }
 
@@ -222,6 +230,10 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) {
       .Case("le_hi20_r", VK_LoongArch_TLS_LE_HI20_R)
       .Case("le_add_r", VK_LoongArch_TLS_LE_ADD_R)
       .Case("le_lo12_r", VK_LoongArch_TLS_LE_LO12_R)
+      .Case("pcrel_20", VK_LoongArch_PCREL20_S2)
+      .Case("ld_pcrel_20", VK_LoongArch_TLS_LD_PCREL20_S2)
+      .Case("gd_pcrel_20", VK_LoongArch_TLS_GD_PCREL20_S2)
+      .Case("desc_pcrel_20", VK_LoongArch_TLS_DESC_PCREL20_S2)
       .Default(VK_LoongArch_Invalid);
 }
 
@@ -264,6 +276,9 @@ void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case VK_LoongArch_TLS_GD_HI20:
   case VK_LoongArch_TLS_DESC_PC_HI20:
   case VK_LoongArch_TLS_DESC_HI20:
+  case VK_LoongArch_TLS_LD_PCREL20_S2:
+  case VK_LoongArch_TLS_GD_PCREL20_S2:
+  case VK_LoongArch_TLS_DESC_PCREL20_S2:
     break;
   }
   fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
index 1546d894d56ab..91215b863ccc2 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
@@ -75,6 +75,10 @@ class LoongArchMCExpr : public MCTargetExpr {
     VK_LoongArch_TLS_LE_HI20_R,
     VK_LoongArch_TLS_LE_ADD_R,
     VK_LoongArch_TLS_LE_LO12_R,
+    VK_LoongArch_PCREL20_S2,
+    VK_LoongArch_TLS_LD_PCREL20_S2,
+    VK_LoongArch_TLS_GD_PCREL20_S2,
+    VK_LoongArch_TLS_DESC_PCREL20_S2,
     VK_LoongArch_Invalid // Must be the last item.
   };
 
diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s
index 958d5cab6f2f3..08a131d4d43f9 100644
--- a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s
+++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s
@@ -99,11 +99,13 @@ jirl $a0, $a0, 0x20000
 # CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %b16) or an integer in the range [-131072, 131068]
 
 ## simm20
-pcaddi $a0, -0x80001
-# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-524288, 524287]
 pcaddu12i $a0, 0x80000
 # CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287]
 
+## simm20_pcaddi
+pcaddi $a0, -0x80001
+# CHECK: :[[#@LINE-1]]:13: error: operand must be a symbol with modifier (e.g. %pcrel_20) or an integer in the range [-524288, 524287]
+
 ## simm20_lu12iw
 lu12i.w $a0, -0x80001
 # CHECK: :[[#@LINE-1]]:14: error: operand must be a symbol with modifier (e.g. %abs_hi20) or an integer in the range [-524288, 524287]
diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s
index e83b67199e656..091dce200b7de 100644
--- a/llvm/test/MC/LoongArch/Relocations/relocations.s
+++ b/llvm/test/MC/LoongArch/Relocations/relocations.s
@@ -288,3 +288,23 @@ addi.d $t1, $a2, %le_lo12_r(foo)
 # RELOC: R_LARCH_TLS_LE_LO12_R foo 0x0
 # INSTR: addi.d $t1, $a2, %le_lo12_r(foo)
 # FIXUP: fixup A - offset: 0, value: %le_lo12_r(foo), kind: FK_NONE
+
+pcaddi $t1, %pcrel_20(foo)
+# RELOC: R_LARCH_PCREL20_S2 foo 0x0
+# INSTR: pcaddi $t1, %pcrel_20(foo)
+# FIXUP: fixup A - offset: 0, value: %pcrel_20(foo), kind: FK_NONE
+
+pcaddi $t1, %ld_pcrel_20(foo)
+# RELOC: R_LARCH_TLS_LD_PCREL20_S2 foo 0x0
+# INSTR: pcaddi $t1, %ld_pcrel_20(foo)
+# FIXUP: fixup A - offset: 0, value: %ld_pcrel_20(foo), kind: FK_NONE
+
+pcaddi $t1, %gd_pcrel_20(foo)
+# RELOC: R_LARCH_TLS_GD_PCREL20_S2 foo 0x0
+# INSTR: pcaddi $t1, %gd_pcrel_20(foo)
+# FIXUP: fixup A - offset: 0, value: %gd_pcrel_20(foo), kind: FK_NONE
+
+pcaddi $t1, %desc_pcrel_20(foo)
+# RELOC: R_LARCH_TLS_DESC_PCREL20_S2 foo 0x0
+# INSTR: pcaddi $t1, %desc_pcrel_20(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_pcrel_20(foo), kind: FK_NONE

From 47fafad155a8d8394720dbc689558d304662e205 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Fri, 26 Jul 2024 13:10:06 -0500
Subject: [PATCH 050/427] [libc++] Fix bug in atomic_ref's calculation of
 lock_free-ness (#99570)

The builtin __atomic_always_lock_free takes into account the type of the
pointer provided as the second argument. Because we were passing void*,
rather than T*, the calculation failed. This meant that
atomic_ref<T>::is_always_lock_free was only true for char & bool.

This bug exists elsewhere in the atomic library (when using GCC, we fail
to pass a pointer at all, and we fail to correctly align the atomic like
_Atomic would).

This change also attempts to start sorting out testing difficulties with
this function that caused the bug to exist by using the
__GCC_ATOMIC_(CHAR|SHORT|INT|LONG|LLONG|POINTER)_IS_LOCK_FREE predefined
macros to establish an expected value for `is_always_lock_free` and
`is_lock_free` for the respective types, as well as types with matching
sizes and compatible alignment values.

Using these compiler pre-defines we can actually validate that certain
types, like char and int, are actually always lock free like they are on
every platform in the wild.

Note that this patch was actually authored by Eric Fiselier but I picked
up the patch and GitHub won't let me set Eric as the primary author.

Co-authored-by: Eric Fiselier <eric@efcs.ca>
(cherry picked from commit cc1dfb37aa84d1524243b83fadb8ff0f821e03e9)
---
 libcxx/include/__atomic/atomic_ref.h          |  15 +-
 .../atomics.lockfree/is_always_lock_free.cpp  | 165 ++++++++++++++++++
 .../isalwayslockfree.pass.cpp                 | 120 -------------
 .../atomics.ref/is_always_lock_free.pass.cpp  |  34 +++-
 libcxx/test/support/atomic_helpers.h          | 103 +++++++++++
 5 files changed, 312 insertions(+), 125 deletions(-)
 create mode 100644 libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
 delete mode 100644 libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp

diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h
index 156f1961151c1..2849b82e1a3dd 100644
--- a/libcxx/include/__atomic/atomic_ref.h
+++ b/libcxx/include/__atomic/atomic_ref.h
@@ -42,6 +42,19 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 20
 
+// These types are required to make __atomic_is_always_lock_free work across GCC and Clang.
+// The purpose of this trick is to make sure that we provide an object with the correct alignment
+// to __atomic_is_always_lock_free, since that answer depends on the alignment.
+template <size_t _Alignment>
+struct __alignment_checker_type {
+  alignas(_Alignment) char __data;
+};
+
+template <size_t _Alignment>
+struct __get_aligner_instance {
+  static constexpr __alignment_checker_type<_Alignment> __instance{};
+};
+
 template <class _Tp>
 struct __atomic_ref_base {
 protected:
@@ -105,7 +118,7 @@ struct __atomic_ref_base {
   // that the pointer is going to be aligned properly at runtime because that is a (checked) precondition
   // of atomic_ref's constructor.
   static constexpr bool is_always_lock_free =
-      __atomic_always_lock_free(sizeof(_Tp), reinterpret_cast<void*>(-required_alignment));
+      __atomic_always_lock_free(sizeof(_Tp), &__get_aligner_instance<required_alignment>::__instance);
 
   _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(_Tp), __ptr_); }
 
diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
new file mode 100644
index 0000000000000..2dc7f5c765419
--- /dev/null
+++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
@@ -0,0 +1,165 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <atomic>
+//
+// template <class T>
+// class atomic;
+//
+// static constexpr bool is_always_lock_free;
+
+#include <atomic>
+#include <cassert>
+#include <cstddef>
+
+#include "test_macros.h"
+#include "atomic_helpers.h"
+
+template <typename T>
+void check_always_lock_free(std::atomic<T> const& a) {
+  using InfoT = LockFreeStatusInfo<T>;
+
+  constexpr std::same_as<const bool> decltype(auto) is_always_lock_free = std::atomic<T>::is_always_lock_free;
+
+  // If we know the status of T for sure, validate the exact result of the function.
+  if constexpr (InfoT::status_known) {
+    constexpr LockFreeStatus known_status = InfoT::value;
+    if constexpr (known_status == LockFreeStatus::always) {
+      static_assert(is_always_lock_free, "is_always_lock_free is inconsistent with known lock-free status");
+      assert(a.is_lock_free() && "is_lock_free() is inconsistent with known lock-free status");
+    } else if constexpr (known_status == LockFreeStatus::never) {
+      static_assert(!is_always_lock_free, "is_always_lock_free is inconsistent with known lock-free status");
+      assert(!a.is_lock_free() && "is_lock_free() is inconsistent with known lock-free status");
+    } else {
+      assert(a.is_lock_free() || !a.is_lock_free()); // This is kinda dumb, but we might as well call the function once.
+    }
+  }
+
+  // In all cases, also sanity-check it based on the implication always-lock-free => lock-free.
+  if (is_always_lock_free) {
+    std::same_as<bool> decltype(auto) is_lock_free = a.is_lock_free();
+    assert(is_lock_free);
+  }
+  ASSERT_NOEXCEPT(a.is_lock_free());
+}
+
+#define CHECK_ALWAYS_LOCK_FREE(T)                                                                                      \
+  do {                                                                                                                 \
+    typedef T type;                                                                                                    \
+    type obj{};                                                                                                        \
+    std::atomic<type> a(obj);                                                                                          \
+    check_always_lock_free(a);                                                                                         \
+  } while (0)
+
+void test() {
+  char c = 'x';
+  check_always_lock_free(std::atomic<char>(c));
+
+  int i = 0;
+  check_always_lock_free(std::atomic<int>(i));
+
+  float f = 0.f;
+  check_always_lock_free(std::atomic<float>(f));
+
+  int* p = &i;
+  check_always_lock_free(std::atomic<int*>(p));
+
+  CHECK_ALWAYS_LOCK_FREE(bool);
+  CHECK_ALWAYS_LOCK_FREE(char);
+  CHECK_ALWAYS_LOCK_FREE(signed char);
+  CHECK_ALWAYS_LOCK_FREE(unsigned char);
+#if TEST_STD_VER > 17 && defined(__cpp_char8_t)
+  CHECK_ALWAYS_LOCK_FREE(char8_t);
+#endif
+  CHECK_ALWAYS_LOCK_FREE(char16_t);
+  CHECK_ALWAYS_LOCK_FREE(char32_t);
+  CHECK_ALWAYS_LOCK_FREE(wchar_t);
+  CHECK_ALWAYS_LOCK_FREE(short);
+  CHECK_ALWAYS_LOCK_FREE(unsigned short);
+  CHECK_ALWAYS_LOCK_FREE(int);
+  CHECK_ALWAYS_LOCK_FREE(unsigned int);
+  CHECK_ALWAYS_LOCK_FREE(long);
+  CHECK_ALWAYS_LOCK_FREE(unsigned long);
+  CHECK_ALWAYS_LOCK_FREE(long long);
+  CHECK_ALWAYS_LOCK_FREE(unsigned long long);
+  CHECK_ALWAYS_LOCK_FREE(std::nullptr_t);
+  CHECK_ALWAYS_LOCK_FREE(void*);
+  CHECK_ALWAYS_LOCK_FREE(float);
+  CHECK_ALWAYS_LOCK_FREE(double);
+  CHECK_ALWAYS_LOCK_FREE(long double);
+#if __has_attribute(vector_size) && defined(_LIBCPP_VERSION)
+  CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(1 * sizeof(int)))));
+  CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(2 * sizeof(int)))));
+  CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(4 * sizeof(int)))));
+  CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(16 * sizeof(int)))));
+  CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(32 * sizeof(int)))));
+  CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(1 * sizeof(float)))));
+  CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(2 * sizeof(float)))));
+  CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(4 * sizeof(float)))));
+  CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(16 * sizeof(float)))));
+  CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(32 * sizeof(float)))));
+  CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(1 * sizeof(double)))));
+  CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(2 * sizeof(double)))));
+  CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(4 * sizeof(double)))));
+  CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(16 * sizeof(double)))));
+  CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(32 * sizeof(double)))));
+#endif // __has_attribute(vector_size) && defined(_LIBCPP_VERSION)
+  CHECK_ALWAYS_LOCK_FREE(struct Empty{});
+  CHECK_ALWAYS_LOCK_FREE(struct OneInt { int i; });
+  CHECK_ALWAYS_LOCK_FREE(struct IntArr2 { int i[2]; });
+  CHECK_ALWAYS_LOCK_FREE(struct FloatArr3 { float i[3]; });
+  CHECK_ALWAYS_LOCK_FREE(struct LLIArr2 { long long int i[2]; });
+  CHECK_ALWAYS_LOCK_FREE(struct LLIArr4 { long long int i[4]; });
+  CHECK_ALWAYS_LOCK_FREE(struct LLIArr8 { long long int i[8]; });
+  CHECK_ALWAYS_LOCK_FREE(struct LLIArr16 { long long int i[16]; });
+  CHECK_ALWAYS_LOCK_FREE(struct Padding {
+    char c; /* padding */
+    long long int i;
+  });
+  CHECK_ALWAYS_LOCK_FREE(union IntFloat {
+    int i;
+    float f;
+  });
+  CHECK_ALWAYS_LOCK_FREE(enum class CharEnumClass : char{foo});
+
+  // C macro and static constexpr must be consistent.
+  enum class CharEnumClass : char { foo };
+  static_assert(std::atomic<bool>::is_always_lock_free == (2 == ATOMIC_BOOL_LOCK_FREE), "");
+  static_assert(std::atomic<char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
+  static_assert(std::atomic<CharEnumClass>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
+  static_assert(std::atomic<signed char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
+  static_assert(std::atomic<unsigned char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
+#if TEST_STD_VER > 17 && defined(__cpp_char8_t)
+  static_assert(std::atomic<char8_t>::is_always_lock_free == (2 == ATOMIC_CHAR8_T_LOCK_FREE), "");
+#endif
+  static_assert(std::atomic<char16_t>::is_always_lock_free == (2 == ATOMIC_CHAR16_T_LOCK_FREE), "");
+  static_assert(std::atomic<char32_t>::is_always_lock_free == (2 == ATOMIC_CHAR32_T_LOCK_FREE), "");
+  static_assert(std::atomic<wchar_t>::is_always_lock_free == (2 == ATOMIC_WCHAR_T_LOCK_FREE), "");
+  static_assert(std::atomic<short>::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), "");
+  static_assert(std::atomic<unsigned short>::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), "");
+  static_assert(std::atomic<int>::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), "");
+  static_assert(std::atomic<unsigned int>::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), "");
+  static_assert(std::atomic<long>::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), "");
+  static_assert(std::atomic<unsigned long>::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), "");
+  static_assert(std::atomic<long long>::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), "");
+  static_assert(std::atomic<unsigned long long>::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), "");
+  static_assert(std::atomic<void*>::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), "");
+  static_assert(std::atomic<std::nullptr_t>::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), "");
+
+#if TEST_STD_VER >= 20
+  static_assert(std::atomic_signed_lock_free::is_always_lock_free, "");
+  static_assert(std::atomic_unsigned_lock_free::is_always_lock_free, "");
+#endif
+}
+
+int main(int, char**) {
+  test();
+  return 0;
+}
diff --git a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
deleted file mode 100644
index 6d6e6477bc251..0000000000000
--- a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// UNSUPPORTED: c++03, c++11, c++14
-
-// <atomic>
-
-// static constexpr bool is_always_lock_free;
-
-#include <atomic>
-#include <cassert>
-#include <cstddef>
-
-#include "test_macros.h"
-
-template <typename T>
-void checkAlwaysLockFree() {
-  if (std::atomic<T>::is_always_lock_free) {
-    assert(std::atomic<T>().is_lock_free());
-  }
-}
-
-void run()
-{
-// structs and unions can't be defined in the template invocation.
-// Work around this with a typedef.
-#define CHECK_ALWAYS_LOCK_FREE(T)                                              \
-  do {                                                                         \
-    typedef T type;                                                            \
-    checkAlwaysLockFree<type>();                                               \
-  } while (0)
-
-    CHECK_ALWAYS_LOCK_FREE(bool);
-    CHECK_ALWAYS_LOCK_FREE(char);
-    CHECK_ALWAYS_LOCK_FREE(signed char);
-    CHECK_ALWAYS_LOCK_FREE(unsigned char);
-#if TEST_STD_VER > 17 && defined(__cpp_char8_t)
-    CHECK_ALWAYS_LOCK_FREE(char8_t);
-#endif
-    CHECK_ALWAYS_LOCK_FREE(char16_t);
-    CHECK_ALWAYS_LOCK_FREE(char32_t);
-    CHECK_ALWAYS_LOCK_FREE(wchar_t);
-    CHECK_ALWAYS_LOCK_FREE(short);
-    CHECK_ALWAYS_LOCK_FREE(unsigned short);
-    CHECK_ALWAYS_LOCK_FREE(int);
-    CHECK_ALWAYS_LOCK_FREE(unsigned int);
-    CHECK_ALWAYS_LOCK_FREE(long);
-    CHECK_ALWAYS_LOCK_FREE(unsigned long);
-    CHECK_ALWAYS_LOCK_FREE(long long);
-    CHECK_ALWAYS_LOCK_FREE(unsigned long long);
-    CHECK_ALWAYS_LOCK_FREE(std::nullptr_t);
-    CHECK_ALWAYS_LOCK_FREE(void*);
-    CHECK_ALWAYS_LOCK_FREE(float);
-    CHECK_ALWAYS_LOCK_FREE(double);
-    CHECK_ALWAYS_LOCK_FREE(long double);
-#if __has_attribute(vector_size) && defined(_LIBCPP_VERSION)
-    CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(1 * sizeof(int)))));
-    CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(2 * sizeof(int)))));
-    CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(4 * sizeof(int)))));
-    CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(16 * sizeof(int)))));
-    CHECK_ALWAYS_LOCK_FREE(int __attribute__((vector_size(32 * sizeof(int)))));
-    CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(1 * sizeof(float)))));
-    CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(2 * sizeof(float)))));
-    CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(4 * sizeof(float)))));
-    CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(16 * sizeof(float)))));
-    CHECK_ALWAYS_LOCK_FREE(float __attribute__((vector_size(32 * sizeof(float)))));
-    CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(1 * sizeof(double)))));
-    CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(2 * sizeof(double)))));
-    CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(4 * sizeof(double)))));
-    CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(16 * sizeof(double)))));
-    CHECK_ALWAYS_LOCK_FREE(double __attribute__((vector_size(32 * sizeof(double)))));
-#endif // __has_attribute(vector_size) && defined(_LIBCPP_VERSION)
-    CHECK_ALWAYS_LOCK_FREE(struct Empty {});
-    CHECK_ALWAYS_LOCK_FREE(struct OneInt { int i; });
-    CHECK_ALWAYS_LOCK_FREE(struct IntArr2 { int i[2]; });
-    CHECK_ALWAYS_LOCK_FREE(struct FloatArr3 { float i[3]; });
-    CHECK_ALWAYS_LOCK_FREE(struct LLIArr2 { long long int i[2]; });
-    CHECK_ALWAYS_LOCK_FREE(struct LLIArr4 { long long int i[4]; });
-    CHECK_ALWAYS_LOCK_FREE(struct LLIArr8 { long long int i[8]; });
-    CHECK_ALWAYS_LOCK_FREE(struct LLIArr16 { long long int i[16]; });
-    CHECK_ALWAYS_LOCK_FREE(struct Padding { char c; /* padding */ long long int i; });
-    CHECK_ALWAYS_LOCK_FREE(union IntFloat { int i; float f; });
-    CHECK_ALWAYS_LOCK_FREE(enum class CharEnumClass : char { foo });
-
-    // C macro and static constexpr must be consistent.
-    enum class CharEnumClass : char { foo };
-    static_assert(std::atomic<bool>::is_always_lock_free == (2 == ATOMIC_BOOL_LOCK_FREE), "");
-    static_assert(std::atomic<char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
-    static_assert(std::atomic<CharEnumClass>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
-    static_assert(std::atomic<signed char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
-    static_assert(std::atomic<unsigned char>::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), "");
-#if TEST_STD_VER > 17 && defined(__cpp_char8_t)
-    static_assert(std::atomic<char8_t>::is_always_lock_free == (2 == ATOMIC_CHAR8_T_LOCK_FREE), "");
-#endif
-    static_assert(std::atomic<char16_t>::is_always_lock_free == (2 == ATOMIC_CHAR16_T_LOCK_FREE), "");
-    static_assert(std::atomic<char32_t>::is_always_lock_free == (2 == ATOMIC_CHAR32_T_LOCK_FREE), "");
-    static_assert(std::atomic<wchar_t>::is_always_lock_free == (2 == ATOMIC_WCHAR_T_LOCK_FREE), "");
-    static_assert(std::atomic<short>::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), "");
-    static_assert(std::atomic<unsigned short>::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), "");
-    static_assert(std::atomic<int>::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), "");
-    static_assert(std::atomic<unsigned int>::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), "");
-    static_assert(std::atomic<long>::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), "");
-    static_assert(std::atomic<unsigned long>::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), "");
-    static_assert(std::atomic<long long>::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), "");
-    static_assert(std::atomic<unsigned long long>::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), "");
-    static_assert(std::atomic<void*>::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), "");
-    static_assert(std::atomic<std::nullptr_t>::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), "");
-
-#if TEST_STD_VER >= 20
-    static_assert(std::atomic_signed_lock_free::is_always_lock_free, "");
-    static_assert(std::atomic_unsigned_lock_free::is_always_lock_free, "");
-#endif
-}
-
-int main(int, char**) { run(); return 0; }
diff --git a/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
index 94f65e3b4b669..acdbf63a24d85 100644
--- a/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
@@ -9,7 +9,10 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // <atomic>
-
+//
+// template <class T>
+// class atomic_ref;
+//
 // static constexpr bool is_always_lock_free;
 // bool is_lock_free() const noexcept;
 
@@ -18,10 +21,29 @@
 #include <concepts>
 
 #include "test_macros.h"
+#include "atomic_helpers.h"
 
 template <typename T>
-void check_always_lock_free(std::atomic_ref<T> const a) {
-  std::same_as<const bool> decltype(auto) is_always_lock_free = std::atomic_ref<T>::is_always_lock_free;
+void check_always_lock_free(std::atomic_ref<T> const& a) {
+  using InfoT = LockFreeStatusInfo<T>;
+
+  constexpr std::same_as<const bool> decltype(auto) is_always_lock_free = std::atomic_ref<T>::is_always_lock_free;
+
+  // If we know the status of T for sure, validate the exact result of the function.
+  if constexpr (InfoT::status_known) {
+    constexpr LockFreeStatus known_status = InfoT::value;
+    if constexpr (known_status == LockFreeStatus::always) {
+      static_assert(is_always_lock_free, "is_always_lock_free is inconsistent with known lock-free status");
+      assert(a.is_lock_free() && "is_lock_free() is inconsistent with known lock-free status");
+    } else if constexpr (known_status == LockFreeStatus::never) {
+      static_assert(!is_always_lock_free, "is_always_lock_free is inconsistent with known lock-free status");
+      assert(!a.is_lock_free() && "is_lock_free() is inconsistent with known lock-free status");
+    } else {
+      assert(a.is_lock_free() || !a.is_lock_free()); // This is kinda dumb, but we might as well call the function once.
+    }
+  }
+
+  // In all cases, also sanity-check it based on the implication always-lock-free => lock-free.
   if (is_always_lock_free) {
     std::same_as<bool> decltype(auto) is_lock_free = a.is_lock_free();
     assert(is_lock_free);
@@ -33,10 +55,14 @@ void check_always_lock_free(std::atomic_ref<T> const a) {
   do {                                                                                                                 \
     typedef T type;                                                                                                    \
     type obj{};                                                                                                        \
-    check_always_lock_free(std::atomic_ref<type>(obj));                                                                \
+    std::atomic_ref<type> a(obj);                                                                                      \
+    check_always_lock_free(a);                                                                                         \
   } while (0)
 
 void test() {
+  char c = 'x';
+  check_always_lock_free(std::atomic_ref<char>(c));
+
   int i = 0;
   check_always_lock_free(std::atomic_ref<int>(i));
 
diff --git a/libcxx/test/support/atomic_helpers.h b/libcxx/test/support/atomic_helpers.h
index 0266a0961067b..d2f2b751cb47d 100644
--- a/libcxx/test/support/atomic_helpers.h
+++ b/libcxx/test/support/atomic_helpers.h
@@ -11,9 +11,112 @@
 
 #include <cassert>
 #include <cstdint>
+#include <cstddef>
+#include <type_traits>
 
 #include "test_macros.h"
 
+#if defined(TEST_COMPILER_CLANG)
+#  define TEST_ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE
+#  define TEST_ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE
+#  define TEST_ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE
+#  define TEST_ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE
+#  define TEST_ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE
+#  define TEST_ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE
+#elif defined(TEST_COMPILER_GCC)
+#  define TEST_ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE
+#  define TEST_ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE
+#  define TEST_ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE
+#  define TEST_ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE
+#  define TEST_ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE
+#  define TEST_ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE
+#elif TEST_COMPILER_MSVC
+// This is lifted from STL/stl/inc/atomic on github for the purposes of
+// keeping the tests compiling for MSVC's STL. It's not a perfect solution
+// but at least the tests will keep running.
+//
+// Note MSVC's STL never produces a type that is sometimes lock free, but not always lock free.
+template <class T, size_t Size = sizeof(T)>
+constexpr bool msvc_is_lock_free_macro_value() {
+  return (Size <= 8 && (Size & Size - 1) == 0) ? 2 : 0;
+}
+#  define TEST_ATOMIC_CHAR_LOCK_FREE ::msvc_is_lock_free_macro_value<char>()
+#  define TEST_ATOMIC_SHORT_LOCK_FREE ::msvc_is_lock_free_macro_value<short>()
+#  define TEST_ATOMIC_INT_LOCK_FREE ::msvc_is_lock_free_macro_value<int>()
+#  define TEST_ATOMIC_LONG_LOCK_FREE ::msvc_is_lock_free_macro_value<long>()
+#  define TEST_ATOMIC_LLONG_LOCK_FREE ::msvc_is_lock_free_macro_value<long long>()
+#  define TEST_ATOMIC_POINTER_LOCK_FREE ::msvc_is_lock_free_macro_value<void*>()
+#else
+#  error "Unknown compiler"
+#endif
+
+#ifdef TEST_COMPILER_CLANG
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wc++11-extensions"
+#endif
+
+enum class LockFreeStatus : int { unknown = -1, never = 0, sometimes = 1, always = 2 };
+
+// We should really be checking whether the alignment of T is greater-than-or-equal-to the alignment required
+// for T to be atomic, but this is basically impossible to implement portably. Instead, we assume that any type
+// aligned to at least its size is going to be atomic if there exists atomic operations for that size at all,
+// which is true on most platforms. This technically reduces our test coverage in the sense that if a type has
+// an alignment requirement less than its size but could still be made lockfree, LockFreeStatusInfo will report
+// that we don't know whether it is lockfree or not.
+#define COMPARE_TYPES(T, FundamentalT) (sizeof(T) == sizeof(FundamentalT) && TEST_ALIGNOF(T) >= sizeof(T))
+
+template <class T>
+struct LockFreeStatusInfo {
+  static const LockFreeStatus value = LockFreeStatus(
+      COMPARE_TYPES(T, char)
+          ? TEST_ATOMIC_CHAR_LOCK_FREE
+          : (COMPARE_TYPES(T, short)
+                 ? TEST_ATOMIC_SHORT_LOCK_FREE
+                 : (COMPARE_TYPES(T, int)
+                        ? TEST_ATOMIC_INT_LOCK_FREE
+                        : (COMPARE_TYPES(T, long)
+                               ? TEST_ATOMIC_LONG_LOCK_FREE
+                               : (COMPARE_TYPES(T, long long)
+                                      ? TEST_ATOMIC_LLONG_LOCK_FREE
+                                      : (COMPARE_TYPES(T, void*) ? TEST_ATOMIC_POINTER_LOCK_FREE : -1))))));
+
+  static const bool status_known = LockFreeStatusInfo::value != LockFreeStatus::unknown;
+};
+
+#undef COMPARE_TYPES
+
+// This doesn't work in C++03 due to issues with scoped enumerations. Just disable the test.
+#if TEST_STD_VER >= 11
+static_assert(LockFreeStatusInfo<char>::status_known, "");
+static_assert(LockFreeStatusInfo<short>::status_known, "");
+static_assert(LockFreeStatusInfo<int>::status_known, "");
+static_assert(LockFreeStatusInfo<long>::status_known, "");
+static_assert(LockFreeStatusInfo<void*>::status_known, "");
+
+// long long is a bit funky: on some platforms, its alignment is 4 bytes but its size is
+// 8 bytes. In that case, atomics may or may not be lockfree based on their address.
+static_assert(alignof(long long) == sizeof(long long) ? LockFreeStatusInfo<long long>::status_known : true, "");
+
+// Those should always be lock free: hardcode some expected values to make sure our tests are actually
+// testing something meaningful.
+static_assert(LockFreeStatusInfo<char>::value == LockFreeStatus::always, "");
+static_assert(LockFreeStatusInfo<short>::value == LockFreeStatus::always, "");
+static_assert(LockFreeStatusInfo<int>::value == LockFreeStatus::always, "");
+#endif
+
+// These macros are somewhat suprising to use, since they take the values 0, 1, or 2.
+// To make the tests clearer, get rid of them in preference of LockFreeStatusInfo.
+#undef TEST_ATOMIC_CHAR_LOCK_FREE
+#undef TEST_ATOMIC_SHORT_LOCK_FREE
+#undef TEST_ATOMIC_INT_LOCK_FREE
+#undef TEST_ATOMIC_LONG_LOCK_FREE
+#undef TEST_ATOMIC_LLONG_LOCK_FREE
+#undef TEST_ATOMIC_POINTER_LOCK_FREE
+
+#ifdef TEST_COMPILER_CLANG
+#  pragma clang diagnostic pop
+#endif
+
 struct UserAtomicType {
   int i;
 

From 00341691ac31795b2d6fd5849efc586dd57dd40e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 26 Jul 2024 17:11:01 -0700
Subject: [PATCH 051/427] [RISCV] Don't crash in RISCVMergeBaseOffset if
 INLINE_ASM uses address register in a non-memory constraint. (#100790)

If the register is used by a non-memory constraint we should disable the
fold. Otherwise, we may leave CommonOffset unassigned.

Fixes #100779.

(cherry picked from commit c901b739b67476b00209b7ee706de94c0595d763)
---
 .../lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 10 +++-
 .../RISCV/inline-asm-mem-constraint.ll        | 50 +++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index fecc83a821f42..b6ac3384e7d3e 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -429,8 +429,16 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
         NumOps = Flags.getNumOperandRegisters();
 
         // Memory constraints have two operands.
-        if (NumOps != 2 || !Flags.isMemKind())
+        if (NumOps != 2 || !Flags.isMemKind()) {
+          // If the register is used by something other than a memory contraint,
+          // we should not fold.
+          for (unsigned J = 0; J < NumOps; ++J) {
+            const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
+            if (MO.isReg() && MO.getReg() == DestReg)
+              return false;
+          }
           continue;
+        }
 
         // We can't do this for constraint A because AMO instructions don't have
         // an immediate offset field.
diff --git a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
index 52d0dabf18839..6666d92feaac2 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm-mem-constraint.ll
@@ -2252,3 +2252,53 @@ label:
   call void asm "lw zero, $0", "*A"(ptr elementtype(i32) getelementptr (i8, ptr blockaddress(@constraint_A_with_local_3, %label), i32 2000))
   ret void
 }
+
+@_ZN5repro9MY_BUFFER17hb0f674501d5980a6E = external global <{ [16 x i8] }>
+
+; Address is not used by a memory constraint.
+define void @should_not_fold() {
+; RV32I-LABEL: should_not_fold:
+; RV32I:       # %bb.0: # %start
+; RV32I-NEXT:    .cfi_def_cfa_offset 0
+; RV32I-NEXT:    lui a0, %hi(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV32I-NEXT:    addi a0, a0, %lo(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    ecall
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: should_not_fold:
+; RV64I:       # %bb.0: # %start
+; RV64I-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-NEXT:    lui a0, %hi(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV64I-NEXT:    addi a0, a0, %lo(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    ecall
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ret
+;
+; RV32I-MEDIUM-LABEL: should_not_fold:
+; RV32I-MEDIUM:       # %bb.0: # %start
+; RV32I-MEDIUM-NEXT:    .cfi_def_cfa_offset 0
+; RV32I-MEDIUM-NEXT:  .Lpcrel_hi39:
+; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Lpcrel_hi39)
+; RV32I-MEDIUM-NEXT:    #APP
+; RV32I-MEDIUM-NEXT:    ecall
+; RV32I-MEDIUM-NEXT:    #NO_APP
+; RV32I-MEDIUM-NEXT:    ret
+;
+; RV64I-MEDIUM-LABEL: should_not_fold:
+; RV64I-MEDIUM:       # %bb.0: # %start
+; RV64I-MEDIUM-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-MEDIUM-NEXT:  .Lpcrel_hi39:
+; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+; RV64I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Lpcrel_hi39)
+; RV64I-MEDIUM-NEXT:    #APP
+; RV64I-MEDIUM-NEXT:    ecall
+; RV64I-MEDIUM-NEXT:    #NO_APP
+; RV64I-MEDIUM-NEXT:    ret
+start:
+  %0 = tail call ptr asm sideeffect alignstack "ecall", "=&{x10},0,~{vtype},~{vl},~{vxsat},~{vxrm},~{memory}"(ptr @_ZN5repro9MY_BUFFER17hb0f674501d5980a6E)
+  ret void
+}

From f53633b1e26f2c3dfb229fccf94b9738edb50cd9 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 23 Jul 2024 13:04:54 -0500
Subject: [PATCH 052/427] [libc++][libc++abi] Minor follow-up changes after
 ptrauth upstreaming (#87481)

This patch applies the comments provided on #84573. This is done as a
separate PR to avoid merge conflicts with downstreams that already had
ptrauth support.

(cherry picked from commit e64e745e8fb802ffb06259b1a5ba3db713a17087)
---
 libcxx/include/typeinfo                   |  9 ++++---
 libcxx/src/include/overridable_function.h |  6 ++---
 libcxxabi/src/private_typeinfo.cpp        | 33 +++++++++--------------
 3 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo
index d1c0de3c1bfdd..2727cad02fa99 100644
--- a/libcxx/include/typeinfo
+++ b/libcxx/include/typeinfo
@@ -275,13 +275,14 @@ struct __type_info_implementations {
           __impl;
 };
 
-#    if defined(__arm64__) && __has_cpp_attribute(clang::ptrauth_vtable_pointer)
-#      if __has_feature(ptrauth_type_info_discriminated_vtable_pointer)
+#    if __has_cpp_attribute(_Clang::__ptrauth_vtable_pointer__)
+#      if __has_feature(ptrauth_type_info_vtable_pointer_discrimination)
 #        define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH                                                                  \
-          [[clang::ptrauth_vtable_pointer(process_independent, address_discrimination, type_discrimination)]]
+          [[_Clang::__ptrauth_vtable_pointer__(process_independent, address_discrimination, type_discrimination)]]
 #      else
 #        define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH                                                                  \
-          [[clang::ptrauth_vtable_pointer(process_independent, no_address_discrimination, no_extra_discrimination)]]
+          [[_Clang::__ptrauth_vtable_pointer__(                                                                        \
+              process_independent, no_address_discrimination, no_extra_discrimination)]]
 #      endif
 #    else
 #      define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH
diff --git a/libcxx/src/include/overridable_function.h b/libcxx/src/include/overridable_function.h
index e71e4f104b290..c7639f56eee26 100644
--- a/libcxx/src/include/overridable_function.h
+++ b/libcxx/src/include/overridable_function.h
@@ -13,7 +13,7 @@
 #include <__config>
 #include <cstdint>
 
-#if defined(__arm64e__) && __has_feature(ptrauth_calls)
+#if __has_feature(ptrauth_calls)
 #  include <ptrauth.h>
 #endif
 
@@ -83,13 +83,13 @@ _LIBCPP_HIDE_FROM_ABI bool __is_function_overridden(_Ret (*__fptr)(_Args...)) no
   uintptr_t __end   = reinterpret_cast<uintptr_t>(&__lcxx_override_end);
   uintptr_t __ptr   = reinterpret_cast<uintptr_t>(__fptr);
 
-#if defined(__arm64e__) && __has_feature(ptrauth_calls)
+#  if __has_feature(ptrauth_calls)
   // We must pass a void* to ptrauth_strip since it only accepts a pointer type. Also, in particular,
   // we must NOT pass a function pointer, otherwise we will strip the function pointer, and then attempt
   // to authenticate and re-sign it when casting it to a uintptr_t again, which will fail because we just
   // stripped the function pointer. See rdar://122927845.
   __ptr = reinterpret_cast<uintptr_t>(ptrauth_strip(reinterpret_cast<void*>(__ptr), ptrauth_key_function_pointer));
-#endif
+#  endif
 
   // Finally, the function was overridden if it falls outside of the section's bounds.
   return __ptr < __start || __ptr > __end;
diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp
index 9e58501a55934..9dba91e1985e3 100644
--- a/libcxxabi/src/private_typeinfo.cpp
+++ b/libcxxabi/src/private_typeinfo.cpp
@@ -55,15 +55,12 @@
 #include <ptrauth.h>
 #endif
 
-
-template<typename T>
-static inline
-T *
-get_vtable(T *vtable) {
+template <typename T>
+static inline T* strip_vtable(T* vtable) {
 #if __has_feature(ptrauth_calls)
-    vtable = ptrauth_strip(vtable, ptrauth_key_cxx_vtable_pointer);
+  vtable = ptrauth_strip(vtable, ptrauth_key_cxx_vtable_pointer);
 #endif
-    return vtable;
+  return vtable;
 }
 
 static inline
@@ -117,11 +114,10 @@ void dyn_cast_get_derived_info(derived_object_info* info, const void* static_ptr
         reinterpret_cast<const uint8_t*>(vtable) + offset_to_ti_proxy;
     info->dynamic_type = *(reinterpret_cast<const __class_type_info* const*>(ptr_to_ti_proxy));
 #else
-    void **vtable = *static_cast<void ** const *>(static_ptr);
-    vtable = get_vtable(vtable);
-    info->offset_to_derived = reinterpret_cast<ptrdiff_t>(vtable[-2]);
-    info->dynamic_ptr = static_cast<const char*>(static_ptr) + info->offset_to_derived;
-    info->dynamic_type = static_cast<const __class_type_info*>(vtable[-1]);
+  void** vtable = strip_vtable(*static_cast<void** const*>(static_ptr));
+  info->offset_to_derived = reinterpret_cast<ptrdiff_t>(vtable[-2]);
+  info->dynamic_ptr = static_cast<const char*>(static_ptr) + info->offset_to_derived;
+  info->dynamic_type = static_cast<const __class_type_info*>(vtable[-1]);
 #endif
 }
 
@@ -576,8 +572,7 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info,
        find the layout.  */
     offset_to_base = __offset_flags >> __offset_shift;
     if (is_virtual) {
-      const char* vtable = *static_cast<const char* const*>(adjustedPtr);
-      vtable = get_vtable(vtable);
+      const char* vtable = strip_vtable(*static_cast<const char* const*>(adjustedPtr));
       offset_to_base = update_offset_to_base(vtable, offset_to_base);
     }
   } else if (!is_virtual) {
@@ -1517,9 +1512,8 @@ __base_class_type_info::search_above_dst(__dynamic_cast_info* info,
     ptrdiff_t offset_to_base = __offset_flags >> __offset_shift;
     if (__offset_flags & __virtual_mask)
     {
-        const char* vtable = *static_cast<const char*const*>(current_ptr);
-        vtable = get_vtable(vtable);
-        offset_to_base = update_offset_to_base(vtable, offset_to_base);
+      const char* vtable = strip_vtable(*static_cast<const char* const*>(current_ptr));
+      offset_to_base = update_offset_to_base(vtable, offset_to_base);
     }
     __base_type->search_above_dst(info, dst_ptr,
                                   static_cast<const char*>(current_ptr) + offset_to_base,
@@ -1538,9 +1532,8 @@ __base_class_type_info::search_below_dst(__dynamic_cast_info* info,
     ptrdiff_t offset_to_base = __offset_flags >> __offset_shift;
     if (__offset_flags & __virtual_mask)
     {
-        const char* vtable = *static_cast<const char*const*>(current_ptr);
-        vtable = get_vtable(vtable);
-        offset_to_base = update_offset_to_base(vtable, offset_to_base);
+      const char* vtable = strip_vtable(*static_cast<const char* const*>(current_ptr));
+      offset_to_base = update_offset_to_base(vtable, offset_to_base);
     }
     __base_type->search_below_dst(info,
                                   static_cast<const char*>(current_ptr) + offset_to_base,

From e1d05010c3277ffb1f71fbb7bfad66704dcfc4cf Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx@google.com>
Date: Wed, 24 Jul 2024 15:58:52 +0200
Subject: [PATCH 053/427] Fix lifetimebound for field access (#100197)

Fixes: https://github.com/llvm/llvm-project/issues/81589

There is no way to switch this off without  `-Wno-dangling`.
---
 clang/docs/ReleaseNotes.rst               |  3 +++
 clang/lib/Sema/CheckExprLifetime.cpp      |  9 ++++++++
 clang/test/SemaCXX/attr-lifetimebound.cpp | 26 +++++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 549da6812740f..71d615553c613 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -767,6 +767,9 @@ Improvements to Clang's diagnostics
 
 - Clang now diagnoses undefined behavior in constant expressions more consistently. This includes invalid shifts, and signed overflow in arithmetic.
 
+- Clang now diagnoses dangling references to fields of temporary objects. Fixes #GH81589.
+
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
index 5c8ef564f30aa..112cf3d081822 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CheckExprLifetime.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Sema/Initialization.h"
@@ -548,6 +549,14 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
                                        EnableLifetimeWarnings);
   }
 
+  if (auto *M = dyn_cast<MemberExpr>(Init)) {
+    // Lifetime of a non-reference type field is same as base object.
+    if (auto *F = dyn_cast<FieldDecl>(M->getMemberDecl());
+        F && !F->getType()->isReferenceType())
+      visitLocalsRetainedByInitializer(Path, M->getBase(), Visit, true,
+                                       EnableLifetimeWarnings);
+  }
+
   if (isa<CallExpr>(Init)) {
     if (EnableLifetimeWarnings)
       handleGslAnnotatedTypes(Path, Init, Visit);
diff --git a/clang/test/SemaCXX/attr-lifetimebound.cpp b/clang/test/SemaCXX/attr-lifetimebound.cpp
index 70bc545c07bd9..7db0a4d64d259 100644
--- a/clang/test/SemaCXX/attr-lifetimebound.cpp
+++ b/clang/test/SemaCXX/attr-lifetimebound.cpp
@@ -47,6 +47,31 @@ namespace usage_ok {
     q = A(); // expected-warning {{object backing the pointer q will be destroyed at the end of the full-expression}}
     r = A(1); // expected-warning {{object backing the pointer r will be destroyed at the end of the full-expression}}
   }
+
+  struct FieldCheck {
+    struct Set {
+      int a;
+    };
+    struct Pair {
+      const int& a;
+      int b;
+      Set c;
+      int * d;
+    };
+    Pair p;  
+    FieldCheck(const int& a): p(a){}
+    Pair& getR() [[clang::lifetimebound]] { return p; }
+    Pair* getP() [[clang::lifetimebound]] { return &p; }
+    Pair* getNoLB() { return &p; }
+  };
+  void test_field_access() {
+    int x = 0;
+    const int& a = FieldCheck{x}.getR().a;
+    const int& b = FieldCheck{x}.getP()->b;   // expected-warning {{temporary bound to local reference 'b' will be destroyed at the end of the full-expression}}
+    const int& c = FieldCheck{x}.getP()->c.a; // expected-warning {{temporary bound to local reference 'c' will be destroyed at the end of the full-expression}}
+    const int& d = FieldCheck{x}.getNoLB()->c.a;
+    const int* e = FieldCheck{x}.getR().d;
+  }
 }
 
 # 1 "<std>" 1 3
@@ -239,3 +264,4 @@ namespace move_forward_et_al_examples {
   S X;
   S *AddressOfOk = std::addressof(X);
 } // namespace move_forward_et_al_examples
+

From 07ef07813483c6ffa721f795c475cdc3f2341723 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 25 Jul 2024 16:45:09 -0700
Subject: [PATCH 054/427] [ELF] Remove obsoleted comment after #99567

(cherry picked from commit 026972af9c3cbd85b654b67a5b5c3b754a78a997)
---
 lld/ELF/ScriptLexer.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index c8c02ab0f3e09..847cd8423b49a 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -20,11 +20,6 @@
 // in various corner cases. We do not care much about efficiency because
 // the time spent in parsing linker scripts is usually negligible.
 //
-// Our grammar of the linker script is LL(2), meaning that it needs at
-// most two-token lookahead to parse. The only place we need two-token
-// lookahead is labels in version scripts, where we need to parse "local :"
-// as if "local:".
-//
 // Overall, this lexer works fine for most linker scripts. There might
 // be room for improving compatibility, but that's probably not at the
 // top of our todo list.

From 28e2baaeed86fd330d1e0fcacefaf6221685be23 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 25 Jul 2024 17:11:52 -0700
Subject: [PATCH 055/427] [ELF,test] Improve negative linker script tests

(cherry picked from commit 8644a2aa0f3540c69464f56b3d538e888b6cbdcb)
---
 lld/test/ELF/linkerscript/diag.test           | 49 +++++++++++++++++++
 lld/test/ELF/linkerscript/diag1.test          | 15 ------
 lld/test/ELF/linkerscript/diag2.test          | 13 -----
 lld/test/ELF/linkerscript/diag3.test          | 13 -----
 lld/test/ELF/linkerscript/diag4.test          | 14 ------
 lld/test/ELF/linkerscript/diag5.test          | 14 ------
 lld/test/ELF/linkerscript/diag6.test          |  7 ---
 .../invalid.test}                             |  0
 lld/test/ELF/linkerscript/unquoted.test       | 26 ++++++++++
 9 files changed, 75 insertions(+), 76 deletions(-)
 create mode 100644 lld/test/ELF/linkerscript/diag.test
 delete mode 100644 lld/test/ELF/linkerscript/diag1.test
 delete mode 100644 lld/test/ELF/linkerscript/diag2.test
 delete mode 100644 lld/test/ELF/linkerscript/diag3.test
 delete mode 100644 lld/test/ELF/linkerscript/diag4.test
 delete mode 100644 lld/test/ELF/linkerscript/diag5.test
 delete mode 100644 lld/test/ELF/linkerscript/diag6.test
 rename lld/test/ELF/{invalid-linkerscript.test => linkerscript/invalid.test} (100%)
 create mode 100644 lld/test/ELF/linkerscript/unquoted.test

diff --git a/lld/test/ELF/linkerscript/diag.test b/lld/test/ELF/linkerscript/diag.test
new file mode 100644
index 0000000000000..fbc24659a5311
--- /dev/null
+++ b/lld/test/ELF/linkerscript/diag.test
@@ -0,0 +1,49 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o 0.o
+
+#--- 1.lds
+SECTIONS {
+  .text + { *(.text) }
+  .keep : { *(.keep) } /*
+  comment line 1
+  comment line 2 */
+  .temp : { *(.temp) }
+}
+
+# RUN: not ld.lld -shared 0.o -T 1.lds 2>&1 | FileCheck %s --check-prefix=CHECK1 --match-full-lines --strict-whitespace
+#      CHECK1:{{.*}}:2: malformed number: +
+# CHECK1-NEXT:>>>   .text + { *(.text) }
+# CHECK1-NEXT:>>>         ^
+
+#--- 2.lds
+
+UNKNOWN_TAG {
+  .text : { *(.text) }
+  .keep : { *(.keep) }
+  .temp : { *(.temp) }
+}
+
+# RUN: not ld.lld -shared 0.o -T 2.lds 2>&1 | FileCheck %s --check-prefix=CHECK2 --match-full-lines --strict-whitespace
+#      CHECK2:{{.*}}:2: unknown directive: UNKNOWN_TAG
+# CHECK2-NEXT:>>> UNKNOWN_TAG {
+# CHECK2-NEXT:>>> ^
+
+#--- 3.lds
+SECTIONS {
+  .text : { *(.text) }
+  .keep : { *(.keep) }
+  boom ^temp : { *(.temp) }
+}
+#--- 3a.lds
+INCLUDE "3.lds"
+#--- 3b.lds
+foo = 3;
+INCLUDE "3a.lds"
+
+# RUN: not ld.lld -shared 0.o -T 3.lds 2>&1 | FileCheck %s --check-prefix=CHECK3 --match-full-lines --strict-whitespace
+# RUN: not ld.lld -shared 0.o -T 3a.lds 2>&1 | FileCheck %s --check-prefix=CHECK3 --match-full-lines --strict-whitespace
+# RUN: not ld.lld -shared 0.o -T 3b.lds 2>&1 | FileCheck %s --check-prefix=CHECK3 --match-full-lines --strict-whitespace
+#      CHECK3:{{.*}}3.lds:4: malformed number: ^
+# CHECK3-NEXT:>>>   boom ^temp : { *(.temp) }
+# CHECK3-NEXT:>>>        ^
diff --git a/lld/test/ELF/linkerscript/diag1.test b/lld/test/ELF/linkerscript/diag1.test
deleted file mode 100644
index 829bc5a1bffaf..0000000000000
--- a/lld/test/ELF/linkerscript/diag1.test
+++ /dev/null
@@ -1,15 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
-
-SECTIONS {
-  .text + { *(.text) }
-  .keep : { *(.keep) } /*
-  comment line 1
-  comment line 2 */
-  .temp : { *(.temp) }
-}
-
-CHECK:      6: malformed number: +
-CHECK-NEXT: >>>   .text + { *(.text) }
-CHECK-NEXT: >>>         ^
diff --git a/lld/test/ELF/linkerscript/diag2.test b/lld/test/ELF/linkerscript/diag2.test
deleted file mode 100644
index aeb623dbb7f4b..0000000000000
--- a/lld/test/ELF/linkerscript/diag2.test
+++ /dev/null
@@ -1,13 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
-
-UNKNOWN_TAG {
-  .text : { *(.text) }
-  .keep : { *(.keep) }
-  .temp : { *(.temp) }
-}
-
-CHECK:      5: unknown directive: UNKNOWN_TAG
-CHECK-NEXT: >>> UNKNOWN_TAG {
-CHECK-NEXT: >>> ^
diff --git a/lld/test/ELF/linkerscript/diag3.test b/lld/test/ELF/linkerscript/diag3.test
deleted file mode 100644
index 1df8d601db016..0000000000000
--- a/lld/test/ELF/linkerscript/diag3.test
+++ /dev/null
@@ -1,13 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
-
-SECTIONS {
-  .text : { *(.text) }
-  .keep : { *(.keep) }
-  boom ^temp : { *(.temp) }
-}
-
-# CHECK:      8: malformed number: ^
-# CHECK-NEXT: >>>   boom ^temp : { *(.temp) }
-# CHECK-NEXT: >>>        ^
diff --git a/lld/test/ELF/linkerscript/diag4.test b/lld/test/ELF/linkerscript/diag4.test
deleted file mode 100644
index d93a69a95c61d..0000000000000
--- a/lld/test/ELF/linkerscript/diag4.test
+++ /dev/null
@@ -1,14 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: echo "INCLUDE \"%s\"" > %t.script
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %t.script 2>&1 | FileCheck -strict-whitespace %s
-
-SECTIONS {
-  .text : { *(.text) }
-  .keep : { *(.keep) }
-  boom ^temp : { *(.temp) }
-}
-
-# CHECK:      9: malformed number: ^{{$}}
-# CHECK-NEXT: >>>   boom ^temp : { *(.temp) }
-# CHECK-NEXT: >>>        ^
diff --git a/lld/test/ELF/linkerscript/diag5.test b/lld/test/ELF/linkerscript/diag5.test
deleted file mode 100644
index 9a2304baa4413..0000000000000
--- a/lld/test/ELF/linkerscript/diag5.test
+++ /dev/null
@@ -1,14 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: echo "INCLUDE \"%s\"" > %t.script
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %t.script 2>&1 | FileCheck -strict-whitespace %s
-
-SECTIONS {
-  .text : { *(.text) }
-  .keep : { *(.keep) }
-  boom ^temp : { *(.temp) }
-}
-
-# CHECK:      9: malformed number: ^
-# CHECK-NEXT: >>>   boom ^temp : { *(.temp) }
-# CHECK-NEXT: >>>        ^
diff --git a/lld/test/ELF/linkerscript/diag6.test b/lld/test/ELF/linkerscript/diag6.test
deleted file mode 100644
index 0ec0400040b54..0000000000000
--- a/lld/test/ELF/linkerscript/diag6.test
+++ /dev/null
@@ -1,7 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux /dev/null -o %t.o
-# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck %s
-
-SECTIONS /*
-
-CHECK: error: {{.*}}diag6.test:1: unclosed comment in a linker script
diff --git a/lld/test/ELF/invalid-linkerscript.test b/lld/test/ELF/linkerscript/invalid.test
similarity index 100%
rename from lld/test/ELF/invalid-linkerscript.test
rename to lld/test/ELF/linkerscript/invalid.test
diff --git a/lld/test/ELF/linkerscript/unquoted.test b/lld/test/ELF/linkerscript/unquoted.test
new file mode 100644
index 0000000000000..7dca75fe09ab1
--- /dev/null
+++ b/lld/test/ELF/linkerscript/unquoted.test
@@ -0,0 +1,26 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o 0.o
+
+#--- empty.lds
+#--- 1.lds
+
+SECTIONS /*
+#--- 1a.lds
+foo = 3;
+INCLUDE "empty.lds"
+INCLUDE "1.lds"
+
+# RUN: not ld.lld -shared 0.o -T 1.lds 2>&1 | FileCheck %s --check-prefix=CHECK1 --match-full-lines --strict-whitespace
+# RUN: not ld.lld -shared 0.o -T 1a.lds 2>&1 | FileCheck %s --check-prefix=CHECK1A --match-full-lines --strict-whitespace
+#      CHECK1:{{.*}}error: 1.lds:1: unclosed comment in a linker script
+#     CHECK1A:{{.*}}error: 1a.lds:3: unclosed comment in a linker script
+#CHECK1A-NEXT:>>> INCLUDE "1.lds"
+#CHECK1A-NEXT:>>>         ^
+
+#--- 2.lds
+INCLUDE "empty.lds"
+"
+# RUN: not ld.lld -shared 0.o -T 2.lds 2>&1 | FileCheck %s --check-prefix=CHECK2 --match-full-lines --strict-whitespace
+#      CHECK2:{{.*}}error: 2.lds:2: unclosed quote
+# CHECK2-NOT:{{.}}

From 66c08d9095d8795ef5d8ff7bb9940d560a4e0eab Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 27 Jul 2024 10:55:17 -0700
Subject: [PATCH 056/427] [ELF] Add Relocs and invokeOnRelocs. NFC

Relocs is to simplify CREL support (#98115) while invokeOnRelocs
simplifies some relsOrRelas call sites that will use the CREL iterator.

(cherry picked from commit 6efc3774bd8c5fcb105cda73ec27c05ef850dc19)
---
 lld/ELF/ICF.cpp               | 18 +++++++++---------
 lld/ELF/InputSection.cpp      |  2 +-
 lld/ELF/InputSection.h        | 15 ++++++++++++---
 lld/ELF/Relocations.cpp       | 18 ++++++++----------
 lld/ELF/Relocations.h         | 10 ++++++++--
 lld/ELF/SyntheticSections.cpp | 12 ++++--------
 lld/ELF/SyntheticSections.h   |  5 +++--
 7 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index bfc605c793a92..a6b52d78fa806 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -103,12 +103,12 @@ template <class ELFT> class ICF {
   void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant);
 
   template <class RelTy>
-  bool constantEq(const InputSection *a, ArrayRef<RelTy> relsA,
-                  const InputSection *b, ArrayRef<RelTy> relsB);
+  bool constantEq(const InputSection *a, Relocs<RelTy> relsA,
+                  const InputSection *b, Relocs<RelTy> relsB);
 
   template <class RelTy>
-  bool variableEq(const InputSection *a, ArrayRef<RelTy> relsA,
-                  const InputSection *b, ArrayRef<RelTy> relsB);
+  bool variableEq(const InputSection *a, Relocs<RelTy> relsA,
+                  const InputSection *b, Relocs<RelTy> relsB);
 
   bool equalsConstant(const InputSection *a, const InputSection *b);
   bool equalsVariable(const InputSection *a, const InputSection *b);
@@ -235,8 +235,8 @@ void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase,
 // Compare two lists of relocations.
 template <class ELFT>
 template <class RelTy>
-bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra,
-                           const InputSection *secB, ArrayRef<RelTy> rb) {
+bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra,
+                           const InputSection *secB, Relocs<RelTy> rb) {
   if (ra.size() != rb.size())
     return false;
   auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();
@@ -333,8 +333,8 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
 // relocations point to the same section in terms of ICF.
 template <class ELFT>
 template <class RelTy>
-bool ICF<ELFT>::variableEq(const InputSection *secA, ArrayRef<RelTy> ra,
-                           const InputSection *secB, ArrayRef<RelTy> rb) {
+bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra,
+                           const InputSection *secB, Relocs<RelTy> rb) {
   assert(ra.size() == rb.size());
 
   auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();
@@ -441,7 +441,7 @@ void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> fn) {
 // hash.
 template <class RelTy>
 static void combineRelocHashes(unsigned cnt, InputSection *isec,
-                               ArrayRef<RelTy> rels) {
+                               Relocs<RelTy> rels) {
   uint32_t hash = isec->eqClass[cnt % 2];
   for (RelTy rel : rels) {
     Symbol &s = isec->file->getRelocTargetSym(rel);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 12ab1f1eac808..306872f164f6f 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -911,7 +911,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
 // So, we handle relocations for non-alloc sections directly in this
 // function as a performance optimization.
 template <class ELFT, class RelTy>
-void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
+void InputSection::relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels) {
   const unsigned bits = sizeof(typename ELFT::uint) * 8;
   const TargetInfo &target = *elf::target;
   const auto emachine = config->emachine;
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index ec12235f842a9..c89a545e1543f 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -37,11 +37,20 @@ LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions;
 
 // Returned by InputSectionBase::relsOrRelas. At least one member is empty.
 template <class ELFT> struct RelsOrRelas {
-  ArrayRef<typename ELFT::Rel> rels;
-  ArrayRef<typename ELFT::Rela> relas;
+  Relocs<typename ELFT::Rel> rels;
+  Relocs<typename ELFT::Rela> relas;
   bool areRelocsRel() const { return rels.size(); }
 };
 
+#define invokeOnRelocs(sec, f, ...)                                            \
+  {                                                                            \
+    const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>();           \
+    if (rs.areRelocsRel())                                                     \
+      f(__VA_ARGS__, rs.rels);                                                 \
+    else                                                                       \
+      f(__VA_ARGS__, rs.relas);                                                \
+  }
+
 // This is the base class of all sections that lld handles. Some are sections in
 // input files, some are sections in the produced output file and some exist
 // just as a convenience for implementing special ways of combining some
@@ -407,7 +416,7 @@ class InputSection : public InputSectionBase {
   InputSectionBase *getRelocatedSection() const;
 
   template <class ELFT, class RelTy>
-  void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef<RelTy> rels);
+  void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels);
 
   // Points to the canonical section. If ICF folds two sections, repl pointer of
   // one section points to the other.
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 36857d72c647e..713bd15440251 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -475,8 +475,9 @@ class RelocationScanner {
                                 uint64_t relOff) const;
   void processAux(RelExpr expr, RelType type, uint64_t offset, Symbol &sym,
                   int64_t addend) const;
-  template <class ELFT, class RelTy> void scanOne(RelTy *&i);
-  template <class ELFT, class RelTy> void scan(ArrayRef<RelTy> rels);
+  template <class ELFT, class RelTy>
+  void scanOne(typename Relocs<RelTy>::const_iterator &i);
+  template <class ELFT, class RelTy> void scan(Relocs<RelTy> rels);
 };
 } // namespace
 
@@ -1433,7 +1434,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
   return 0;
 }
 
-template <class ELFT, class RelTy> void RelocationScanner::scanOne(RelTy *&i) {
+template <class ELFT, class RelTy>
+void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) {
   const RelTy &rel = *i;
   uint32_t symIndex = rel.getSymbol(config->isMips64EL);
   Symbol &sym = sec->getFile<ELFT>()->getSymbol(symIndex);
@@ -1574,7 +1576,7 @@ static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef<RelTy> rels) {
 }
 
 template <class ELFT, class RelTy>
-void RelocationScanner::scan(ArrayRef<RelTy> rels) {
+void RelocationScanner::scan(Relocs<RelTy> rels) {
   // Not all relocations end up in Sec->Relocations, but a lot do.
   sec->relocations.reserve(rels.size());
 
@@ -1592,7 +1594,7 @@ void RelocationScanner::scan(ArrayRef<RelTy> rels) {
 
   end = static_cast<const void *>(rels.end());
   for (auto i = rels.begin(); i != end;)
-    scanOne<ELFT>(i);
+    scanOne<ELFT, RelTy>(i);
 
   // Sort relocations by offset for more efficient searching for
   // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
@@ -2409,11 +2411,7 @@ template <class ELFT> void elf::checkNoCrossRefs() {
         if (!isd)
           continue;
         parallelForEach(isd->sections, [&](InputSection *sec) {
-          const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
-          if (rels.areRelocsRel())
-            scanCrossRefs<ELFT>(noxref, osec, sec, rels.rels);
-          else
-            scanCrossRefs<ELFT>(noxref, osec, sec, rels.relas);
+          invokeOnRelocs(*sec, scanCrossRefs<ELFT>, noxref, osec, sec);
         });
       }
     }
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 1bee0dedf8587..77d8d52ca3d3f 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -205,6 +205,11 @@ class ThunkCreator {
   uint32_t pass = 0;
 };
 
+template <class RelTy> struct Relocs : ArrayRef<RelTy> {
+  Relocs() = default;
+  Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
+};
+
 // Return a int64_t to make sure we get the sign extension out of the way as
 // early as possible.
 template <class ELFT>
@@ -217,14 +222,15 @@ static inline int64_t getAddend(const typename ELFT::Rela &rel) {
 }
 
 template <typename RelTy>
-ArrayRef<RelTy> sortRels(ArrayRef<RelTy> rels, SmallVector<RelTy, 0> &storage) {
+inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
+                              SmallVector<RelTy, 0> &storage) {
   auto cmp = [](const RelTy &a, const RelTy &b) {
     return a.r_offset < b.r_offset;
   };
   if (!llvm::is_sorted(rels, cmp)) {
     storage.assign(rels.begin(), rels.end());
     llvm::stable_sort(storage, cmp);
-    rels = storage;
+    rels = Relocs<RelTy>(storage);
   }
   return rels;
 }
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 5d3f3df216b85..b40ff0bc3cb03 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3203,10 +3203,10 @@ template <class ELFT> DebugNamesSection<ELFT>::DebugNamesSection() {
 template <class ELFT>
 template <class RelTy>
 void DebugNamesSection<ELFT>::getNameRelocs(
-    InputSection *sec, ArrayRef<RelTy> rels,
-    DenseMap<uint32_t, uint32_t> &relocs) {
+    const InputFile &file, DenseMap<uint32_t, uint32_t> &relocs,
+    Relocs<RelTy> rels) {
   for (const RelTy &rel : rels) {
-    Symbol &sym = sec->file->getRelocTargetSym(rel);
+    Symbol &sym = file.getRelocTargetSym(rel);
     relocs[rel.r_offset] = sym.getVA(getAddend<ELFT>(rel));
   }
 }
@@ -3216,11 +3216,7 @@ template <class ELFT> void DebugNamesSection<ELFT>::finalizeContents() {
   auto relocs = std::make_unique<DenseMap<uint32_t, uint32_t>[]>(numChunks);
   parallelFor(0, numChunks, [&](size_t i) {
     InputSection *sec = inputSections[i];
-    auto rels = sec->template relsOrRelas<ELFT>();
-    if (rels.areRelocsRel())
-      getNameRelocs(sec, rels.rels, relocs.get()[i]);
-    else
-      getNameRelocs(sec, rels.relas, relocs.get()[i]);
+    invokeOnRelocs(*sec, getNameRelocs, *sec->file, relocs.get()[i]);
 
     // Relocate CU offsets with .debug_info + X relocations.
     OutputChunk &chunk = chunks.get()[i];
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index eaa09ea7194fb..d4169e1e1acaf 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -916,8 +916,9 @@ class DebugNamesSection final : public DebugNamesBaseSection {
   void writeTo(uint8_t *buf) override;
 
   template <class RelTy>
-  void getNameRelocs(InputSection *sec, ArrayRef<RelTy> rels,
-                     llvm::DenseMap<uint32_t, uint32_t> &relocs);
+  void getNameRelocs(const InputFile &file,
+                     llvm::DenseMap<uint32_t, uint32_t> &relocs,
+                     Relocs<RelTy> rels);
 
 private:
   static void readOffsets(InputChunk &inputChunk, OutputChunk &chunk,

From 5d9f4600865ca5d734a73f540136402462deac91 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 27 Jul 2024 10:58:27 -0700
Subject: [PATCH 057/427] [ELF] Use invokeOnRelocs. NFC

(cherry picked from commit c7231e49099d56fdc5b2207142184a0bf2544ec1)
---
 lld/ELF/InputSection.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 306872f164f6f..7857d857488c0 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1073,11 +1073,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) {
   auto *sec = cast<InputSection>(this);
   // For a relocatable link, also call relocateNonAlloc() to rewrite applicable
   // locations with tombstone values.
-  const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
-  if (rels.areRelocsRel())
-    sec->relocateNonAlloc<ELFT>(buf, rels.rels);
-  else
-    sec->relocateNonAlloc<ELFT>(buf, rels.relas);
+  invokeOnRelocs(*sec, sec->relocateNonAlloc<ELFT>, buf);
 }
 
 // For each function-defining prologue, find any calls to __morestack,

From cbfbbd74788252b3c5488480fc9d6914f9cf0f38 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Sat, 27 Jul 2024 10:53:23 -0700
Subject: [PATCH 058/427] [llvm-exegesis] Use correct rseq struct size
 (#100804)

Glibc v2.40 changes the definition of __rseq_size to the usable area of
the struct rather than the actual size of the struct to accommodate
users trying to figure out what features can be used. This change breaks
llvm-exegesis trying to disable rseq as the size registered in the
kernel is no longer equal to __rseq_size. This patch adds a check to see
if __rseq_size is less than 32 bytes and uses 32 as a value if it is
given alignment requirements.

Fixes #100791.

(cherry picked from commit 1e8df9e85a1ff213e5868bd822877695f27504ad)
---
 llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index ed53f8fabb175..adee869967d98 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -466,9 +466,20 @@ class SubProcessFunctionExecutorImpl
 // segfaults in the program. Unregister the rseq region so that we can safely
 // unmap it later
 #ifdef GLIBC_INITS_RSEQ
+    unsigned int RseqStructSize = __rseq_size;
+
+    // Glibc v2.40 (the change is also expected to be backported to v2.35)
+    // changes the definition of __rseq_size to be the usable area of the struct
+    // rather than the actual size of the struct. v2.35 uses only 20 bytes of
+    // the 32 byte struct. For now, it should be safe to assume that if the
+    // usable size is less than 32, the actual size of the struct will be 32
+    // bytes given alignment requirements.
+    if (__rseq_size < 32)
+      RseqStructSize = 32;
+
     long RseqDisableOutput =
         syscall(SYS_rseq, (intptr_t)__builtin_thread_pointer() + __rseq_offset,
-                __rseq_size, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+                RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
     if (RseqDisableOutput != 0)
       exit(ChildProcessExitCodeE::RSeqDisableFailed);
 #endif // GLIBC_INITS_RSEQ

From d728d60cea0181ddd38f8710dcc1e13cd1540c56 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Fri, 26 Jul 2024 14:38:36 +0800
Subject: [PATCH 059/427] [lld][ELF][LoongArch] Support
 R_LARCH_TLS_{LD,GD,DESC}_PCREL_S2

Reviewed By: MaskRay, SixWeining

Pull Request: https://github.com/llvm/llvm-project/pull/100105

(cherry picked from commit 0057a969a2a397c1ba57e06b65a8bb56af2ce987)
---
 lld/ELF/Arch/LoongArch.cpp                  |  10 ++
 lld/ELF/Relocations.cpp                     |   3 +-
 lld/test/ELF/loongarch-tls-gd-pcrel20-s2.s  | 129 ++++++++++++++++++
 lld/test/ELF/loongarch-tls-ld-pcrel20-s2.s  |  82 +++++++++++
 lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s | 142 ++++++++++++++++++++
 5 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/ELF/loongarch-tls-gd-pcrel20-s2.s
 create mode 100644 lld/test/ELF/loongarch-tls-ld-pcrel20-s2.s
 create mode 100644 lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s

diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 9466e8b1ce54d..db0bc6c760096 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -511,6 +511,12 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
     return R_TLSDESC;
   case R_LARCH_TLS_DESC_CALL:
     return R_TLSDESC_CALL;
+  case R_LARCH_TLS_LD_PCREL20_S2:
+    return R_TLSLD_PC;
+  case R_LARCH_TLS_GD_PCREL20_S2:
+    return R_TLSGD_PC;
+  case R_LARCH_TLS_DESC_PCREL20_S2:
+    return R_TLSDESC_PC;
 
   // Other known relocs that are explicitly unimplemented:
   //
@@ -557,7 +563,11 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
     write64le(loc, val);
     return;
 
+  // Relocs intended for `pcaddi`.
   case R_LARCH_PCREL20_S2:
+  case R_LARCH_TLS_LD_PCREL20_S2:
+  case R_LARCH_TLS_GD_PCREL20_S2:
+  case R_LARCH_TLS_DESC_PCREL20_S2:
     checkInt(loc, val, 22, rel);
     checkAlignment(loc, val, 4, rel);
     write32le(loc, setJ20(read32le(loc), val >> 2));
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 713bd15440251..9a799cd286135 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1309,7 +1309,8 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym,
   // LoongArch does not yet implement transition from TLSDESC to LE/IE, so
   // generate TLSDESC dynamic relocation for the dynamic linker to handle.
   if (config->emachine == EM_LOONGARCH &&
-      oneof<R_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_CALL>(expr)) {
+      oneof<R_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
+            R_TLSDESC_CALL>(expr)) {
     if (expr != R_TLSDESC_CALL) {
       sym.setFlags(NEEDS_TLSDESC);
       c.addReloc({expr, type, offset, addend, &sym});
diff --git a/lld/test/ELF/loongarch-tls-gd-pcrel20-s2.s b/lld/test/ELF/loongarch-tls-gd-pcrel20-s2.s
new file mode 100644
index 0000000000000..d4d12b9d4a520
--- /dev/null
+++ b/lld/test/ELF/loongarch-tls-gd-pcrel20-s2.s
@@ -0,0 +1,129 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/a.s -o %t/a.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/bc.s -o %t/bc.32.o
+# RUN: ld.lld -shared -soname=bc.so %t/bc.32.o -o %t/bc.32.so
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/a.s -o %t/a.64.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/bc.s -o %t/bc.64.o
+# RUN: ld.lld -shared -soname=bc.so %t/bc.64.o -o %t/bc.64.so
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
+
+## LA32 GD
+# RUN: ld.lld -shared %t/a.32.o %t/bc.32.o -o %t/gd.32.so
+# RUN: llvm-readobj -r %t/gd.32.so | FileCheck --check-prefix=GD32-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.32.so | FileCheck --check-prefix=GD32 %s
+
+## LA32 GD -> LE
+# RUN: ld.lld %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le.32
+# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s
+# RUN: ld.lld -pie %t/a.32.o %t/bc.32.o %t/tga.32.o -o %t/le-pie.32
+# RUN: llvm-readelf -r %t/le-pie.32 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le-pie.32 | FileCheck --check-prefix=LE32-GOT %s
+
+## LA32 GD -> IE
+# RUN: ld.lld %t/a.32.o %t/bc.32.so %t/tga.32.o -o %t/ie.32
+# RUN: llvm-readobj -r %t/ie.32 | FileCheck --check-prefix=IE32-REL %s
+# RUN: llvm-readelf -x .got %t/ie.32 | FileCheck --check-prefix=IE32-GOT %s
+
+## LA64 GD
+# RUN: ld.lld -shared %t/a.64.o %t/bc.64.o -o %t/gd.64.so
+# RUN: llvm-readobj -r %t/gd.64.so | FileCheck --check-prefix=GD64-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/gd.64.so | FileCheck --check-prefix=GD64 %s
+
+## LA64 GD -> LE
+# RUN: ld.lld %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le.64
+# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s
+# RUN: ld.lld -pie %t/a.64.o %t/bc.64.o %t/tga.64.o -o %t/le-pie.64
+# RUN: llvm-readelf -r %t/le-pie.64 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le-pie.64 | FileCheck --check-prefix=LE64-GOT %s
+
+## LA64 GD -> IE
+# RUN: ld.lld %t/a.64.o %t/bc.64.so %t/tga.64.o -o %t/ie.64
+# RUN: llvm-readobj -r %t/ie.64 | FileCheck --check-prefix=IE64-REL %s
+# RUN: llvm-readelf -x .got %t/ie.64 | FileCheck --check-prefix=IE64-GOT %s
+
+# GD32-REL:      .rela.dyn {
+# GD32-REL-NEXT:   0x20300 R_LARCH_TLS_DTPMOD32 a 0x0
+# GD32-REL-NEXT:   0x20304 R_LARCH_TLS_DTPREL32 a 0x0
+# GD32-REL-NEXT:   0x20308 R_LARCH_TLS_DTPMOD32 b 0x0
+# GD32-REL-NEXT:   0x2030C R_LARCH_TLS_DTPREL32 b 0x0
+# GD32-REL-NEXT: }
+
+## &DTPMOD(a) - . = 0x20300 - 0x10250 = 16428<<2
+# GD32:      10250: pcaddi $a0, 16428
+# GD32-NEXT:        bl 44
+
+## &DTPMOD(b) - . = 0x20308 - 0x10258 = 16428<<2
+# GD32:      10258: pcaddi $a0, 16428
+# GD32-NEXT:        bl 36
+
+# GD64-REL:      .rela.dyn {
+# GD64-REL-NEXT:   0x204C0 R_LARCH_TLS_DTPMOD64 a 0x0
+# GD64-REL-NEXT:   0x204C8 R_LARCH_TLS_DTPREL64 a 0x0
+# GD64-REL-NEXT:   0x204D0 R_LARCH_TLS_DTPMOD64 b 0x0
+# GD64-REL-NEXT:   0x204D8 R_LARCH_TLS_DTPREL64 b 0x0
+# GD64-REL-NEXT: }
+
+## &DTPMOD(a) - . = 0x204c0 - 0x10398 = 16458<<2
+# GD64:      10398: pcaddi $a0, 16458
+# GD64-NEXT:        bl 52
+
+## &DTPMOD(b) - . = 0x204d0 - 0x103a4 = 16460<<2
+# GD64:      103a0: pcaddi $a0, 16460
+# GD64-NEXT:        bl 44
+
+# NOREL: no relocations
+
+## .got contains pre-populated values: [a@dtpmod, a@dtprel, b@dtpmod, b@dtprel]
+## a@dtprel = st_value(a) = 0x8
+## b@dtprel = st_value(b) = 0xc
+# LE32-GOT: section '.got':
+# LE32-GOT-NEXT: 0x[[#%x,A:]] 01000000 08000000 01000000 0c000000
+# LE64-GOT: section '.got':
+# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 08000000 00000000
+# LE64-GOT-NEXT: 0x[[#%x,A:]] 01000000 00000000 0c000000 00000000
+
+## a is local - relaxed to LE - its DTPMOD/DTPREL slots are link-time constants.
+## b is external - DTPMOD/DTPREL dynamic relocations are required.
+# IE32-REL:      .rela.dyn {
+# IE32-REL-NEXT:   0x30220 R_LARCH_TLS_DTPMOD32 b 0x0
+# IE32-REL-NEXT:   0x30224 R_LARCH_TLS_DTPREL32 b 0x0
+# IE32-REL-NEXT: }
+# IE32-GOT:      section '.got':
+# IE32-GOT-NEXT: 0x00030218 01000000 08000000 00000000 00000000
+
+# IE64-REL:      .rela.dyn {
+# IE64-REL-NEXT:   0x30380 R_LARCH_TLS_DTPMOD64 b 0x0
+# IE64-REL-NEXT:   0x30388 R_LARCH_TLS_DTPREL64 b 0x0
+# IE64-REL-NEXT: }
+# IE64-GOT:      section '.got':
+# IE64-GOT-NEXT: 0x00030370 01000000 00000000 08000000 00000000
+# IE64-GOT-NEXT: 0x00030380 00000000 00000000 00000000 00000000
+
+#--- a.s
+pcaddi $a0, %gd_pcrel_20(a)
+bl %plt(__tls_get_addr)
+
+pcaddi $a0, %gd_pcrel_20(b)
+bl %plt(__tls_get_addr)
+
+.section .tbss,"awT",@nobits
+.globl a
+.zero 8
+a:
+.zero 4
+
+#--- bc.s
+.section .tbss,"awT",@nobits
+.globl b, c
+b:
+.zero 4
+c:
+
+#--- tga.s
+.globl __tls_get_addr
+__tls_get_addr:
diff --git a/lld/test/ELF/loongarch-tls-ld-pcrel20-s2.s b/lld/test/ELF/loongarch-tls-ld-pcrel20-s2.s
new file mode 100644
index 0000000000000..70186f5538dfc
--- /dev/null
+++ b/lld/test/ELF/loongarch-tls-ld-pcrel20-s2.s
@@ -0,0 +1,82 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --position-independent %t/a.s -o %t/a.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 %t/tga.s -o %t/tga.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --position-independent %t/a.s -o %t/a.64.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 %t/tga.s -o %t/tga.64.o
+
+## LA32 LD
+# RUN: ld.lld -shared %t/a.32.o -o %t/ld.32.so
+# RUN: llvm-readobj -r %t/ld.32.so | FileCheck --check-prefix=LD32-REL %s
+# RUN: llvm-readelf -x .got %t/ld.32.so | FileCheck --check-prefix=LD32-GOT %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.32.so | FileCheck --check-prefixes=LD32 %s
+
+## LA32 LD -> LE
+# RUN: ld.lld %t/a.32.o %t/tga.32.o -o %t/le.32
+# RUN: llvm-readelf -r %t/le.32 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le.32 | FileCheck --check-prefix=LE32-GOT %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/le.32 | FileCheck --check-prefixes=LE32 %s
+
+## LA64 LD
+# RUN: ld.lld -shared %t/a.64.o -o %t/ld.64.so
+# RUN: llvm-readobj -r %t/ld.64.so | FileCheck --check-prefix=LD64-REL %s
+# RUN: llvm-readelf -x .got %t/ld.64.so | FileCheck --check-prefix=LD64-GOT %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/ld.64.so | FileCheck --check-prefixes=LD64 %s
+
+## LA64 LD -> LE
+# RUN: ld.lld %t/a.64.o %t/tga.64.o -o %t/le.64
+# RUN: llvm-readelf -r %t/le.64 | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-readelf -x .got %t/le.64 | FileCheck --check-prefix=LE64-GOT %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t/le.64 | FileCheck --check-prefixes=LE64 %s
+
+## a@dtprel = st_value(a) = 0 is a link-time constant.
+# LD32-REL:      .rela.dyn {
+# LD32-REL-NEXT:   0x20280 R_LARCH_TLS_DTPMOD32 - 0x0
+# LD32-REL-NEXT: }
+# LD32-GOT:      section '.got':
+# LD32-GOT-NEXT: 0x00020280 00000000 00000000
+
+# LD64-REL:      .rela.dyn {
+# LD64-REL-NEXT:   0x20400 R_LARCH_TLS_DTPMOD64 - 0x0
+# LD64-REL-NEXT: }
+# LD64-GOT:      section '.got':
+# LD64-GOT-NEXT: 0x00020400 00000000 00000000 00000000 00000000
+
+## LA32: &DTPMOD(a) - . = 0x20280 - 0x101cc = 16429<<2
+# LD32:      101cc: pcaddi $a0, 16429
+# LD32-NEXT:        bl 48
+
+## LA64: &DTPMOD(a) - . = 0x20400 - 0x102e0 = 16456<<2
+# LD64:      102e0: pcaddi $a0, 16456
+# LD64-NEXT:        bl 44
+
+# NOREL: no relocations
+
+## a is local - its DTPMOD/DTPREL slots are link-time constants.
+## a@dtpmod = 1 (main module)
+# LE32-GOT: section '.got':
+# LE32-GOT-NEXT: 0x0003011c 01000000 00000000
+
+# LE64-GOT: section '.got':
+# LE64-GOT-NEXT: 0x000301d0 01000000 00000000 00000000 00000000
+
+## LA32: DTPMOD(.LANCHOR0) - . = 0x3011c - 0x20114 = 16386<<2
+# LE32:      20114: pcaddi $a0, 16386
+# LE32-NEXT:        bl 4
+
+## LA64: DTPMOD(.LANCHOR0) - . = 0x301d0 - 0x201c8 = 16386<<2
+# LE64:      201c8: pcaddi $a0, 16386
+# LE64-NEXT:        bl 4
+
+#--- a.s
+pcaddi $a0, %ld_pcrel_20(.LANCHOR0)
+bl %plt(__tls_get_addr)
+
+.section .tbss,"awT",@nobits
+.set .LANCHOR0, . + 0
+.zero 8
+
+#--- tga.s
+.globl __tls_get_addr
+__tls_get_addr:
diff --git a/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s b/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s
new file mode 100644
index 0000000000000..99e21d9935197
--- /dev/null
+++ b/lld/test/ELF/loongarch-tlsdesc-pcrel20-s2.s
@@ -0,0 +1,142 @@
+# REQUIRES: loongarch
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 a.s -o a.64.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 c.s -o c.64.o
+# RUN: ld.lld -shared -soname=c.64.so c.64.o -o c.64.so
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 --defsym ELF32=1 a.s -o a.32.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch32 --defsym ELF32=1 c.s -o c.32.o
+# RUN: ld.lld -shared -soname=c.32.so c.32.o -o c.32.so
+
+# RUN: ld.lld -shared -z now a.64.o c.64.o -o a.64.so
+# RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s
+# RUN: llvm-objdump --no-show-raw-insn -h -d a.64.so | FileCheck %s --check-prefix=GD64
+
+# RUN: ld.lld -shared -z now a.64.o c.64.o -o rel.64.so -z rel
+# RUN: llvm-readobj -r -x .got rel.64.so | FileCheck --check-prefix=GD64-REL %s
+
+## FIXME: The transition frome TLSDESC to IE/LE has not yet been implemented.
+## Keep the dynamic relocations and hand them over to dynamic linker.
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.o -o a.64.le
+# RUN: llvm-readobj -r -x .got a.64.le | FileCheck --check-prefix=LE64-RELA %s
+
+# RUN: ld.lld -e 0 -z now a.64.o c.64.so -o a.64.ie
+# RUN: llvm-readobj -r -x .got a.64.ie | FileCheck --check-prefix=IE64-RELA %s
+
+## 32-bit code is mostly the same. We only test a few variants.
+
+# RUN: ld.lld -shared -z now a.32.o c.32.o -o rel.32.so -z rel
+# RUN: llvm-readobj -r -x .got rel.32.so | FileCheck --check-prefix=GD32-REL %s
+
+# GD64-RELA:      .rela.dyn {
+# GD64-RELA-NEXT:   0x203F0 R_LARCH_TLS_DESC64 - 0x7FF
+# GD64-RELA-NEXT:   0x203D0 R_LARCH_TLS_DESC64 a 0x0
+# GD64-RELA-NEXT:   0x203E0 R_LARCH_TLS_DESC64 c 0x0
+# GD64-RELA-NEXT: }
+# GD64-RELA:      Hex dump of section '.got':
+# GD64-RELA-NEXT: 0x000203d0 00000000 00000000 00000000 00000000 .
+# GD64-RELA-NEXT: 0x000203e0 00000000 00000000 00000000 00000000 .
+# GD64-RELA-NEXT: 0x000203f0 00000000 00000000 00000000 00000000 .
+
+# GD64-REL:      .rel.dyn {
+# GD64-REL-NEXT:   0x203D8 R_LARCH_TLS_DESC64 -
+# GD64-REL-NEXT:   0x203B8 R_LARCH_TLS_DESC64 a
+# GD64-REL-NEXT:   0x203C8 R_LARCH_TLS_DESC64 c
+# GD64-REL-NEXT: }
+# GD64-REL:      Hex dump of section '.got':
+# GD64-REL-NEXT: 0x000203b8 00000000 00000000 00000000 00000000 .
+# GD64-REL-NEXT: 0x000203c8 00000000 00000000 00000000 00000000 .
+# GD64-REL-NEXT: 0x000203d8 00000000 00000000 ff070000 00000000 .
+
+# GD64:      .got     00000030 00000000000203d0
+
+## &.got[a]-. = 0x203d0 - 0x102e0 = 16444<<2
+# GD64:        102e0: pcaddi $a0, 16444
+# GD64-NEXT:          ld.d $ra, $a0, 0
+# GD64-NEXT:          jirl $ra, $ra, 0
+# GD64-NEXT:          add.d $a1, $a0, $tp
+
+## &.got[b]-. = 0x203d0+32 - 0x102f0 = 16448<<2
+# GD64:        102f0: pcaddi $a0, 16448
+# GD64-NEXT:          ld.d $ra, $a0, 0
+# GD64-NEXT:          jirl $ra, $ra, 0
+# GD64-NEXT:          add.d $a2, $a0, $tp
+
+## &.got[c]-. = 0x203d0+16 - 0x10300 = 16440<<2
+# GD64:        10300: pcaddi $a0, 16440
+# GD64-NEXT:          ld.d $ra, $a0, 0
+# GD64-NEXT:          jirl $ra, $ra, 0
+# GD64-NEXT:          add.d $a3, $a0, $tp
+
+# LE64-RELA:      .rela.dyn {
+# LE64-RELA-NEXT:   0x30240 R_LARCH_TLS_DESC64 - 0x8
+# LE64-RELA-NEXT:   0x30250 R_LARCH_TLS_DESC64 - 0x800
+# LE64-RELA-NEXT:   0x30260 R_LARCH_TLS_DESC64 - 0x7FF
+# LE64-RELA-NEXT: }
+# LE64-RELA:      Hex dump of section '.got':
+# LE64-RELA-NEXT: 0x00030240 00000000 00000000 00000000 00000000 .
+# LE64-RELA-NEXT: 0x00030250 00000000 00000000 00000000 00000000 .
+# LE64-RELA-NEXT: 0x00030260 00000000 00000000 00000000 00000000 .
+
+# IE64-RELA:      .rela.dyn {
+# IE64-RELA-NEXT:   0x303C8 R_LARCH_TLS_DESC64 - 0x8
+# IE64-RELA-NEXT:   0x303E8 R_LARCH_TLS_DESC64 - 0x7FF
+# IE64-RELA-NEXT:   0x303D8 R_LARCH_TLS_DESC64 c 0x0
+# IE64-RELA-NEXT: }
+# IE64-RELA:      Hex dump of section '.got':
+# IE64-RELA-NEXT: 0x000303c8 00000000 00000000 00000000 00000000 .
+# IE64-RELA-NEXT: 0x000303d8 00000000 00000000 00000000 00000000 .
+# IE64-RELA-NEXT: 0x000303e8 00000000 00000000 00000000 00000000 .
+
+# GD32-REL:      .rel.dyn {
+# GD32-REL-NEXT:    0x20264 R_LARCH_TLS_DESC32 -
+# GD32-REL-NEXT:    0x20254 R_LARCH_TLS_DESC32 a
+# GD32-REL-NEXT:    0x2025C R_LARCH_TLS_DESC32 c
+# GD32-REL-NEXT: }
+# GD32-REL:      Hex dump of section '.got':
+# GD32-REL-NEXT: 0x00020254 00000000 00000000 00000000 00000000 .
+# GD32-REL-NEXT: 0x00020264 00000000 ff070000                   .
+
+#--- a.s
+.macro add dst, src1, src2
+.ifdef ELF32
+add.w \dst, \src1, \src2
+.else
+add.d \dst, \src1, \src2
+.endif
+.endm
+.macro load dst, src1, src2
+.ifdef ELF32
+ld.w \dst, \src1, \src2
+.else
+ld.d \dst, \src1, \src2
+.endif
+.endm
+
+pcaddi $a0, %desc_pcrel_20(a)
+load $ra, $a0, %desc_ld(a)
+jirl $ra, $ra, %desc_call(a)
+add $a1, $a0, $tp
+
+pcaddi $a0, %desc_pcrel_20(b)
+load $ra, $a0, %desc_ld(b)
+jirl $ra, $ra, %desc_call(b)
+add $a2, $a0, $tp
+
+pcaddi $a0, %desc_pcrel_20(c)
+load $ra, $a0, %desc_ld(c)
+jirl $ra, $ra, %desc_call(c)
+add $a3, $a0, $tp
+
+.section .tbss,"awT",@nobits
+.globl a
+.zero 8
+a:
+.zero 2039  ## Place b at 0x7ff
+b:
+.zero 1
+
+#--- c.s
+.section .tbss,"awT",@nobits
+.globl c
+c: .zero 4

From bf173ba0ea34a59d3ce76ce7535c8ca186bdf681 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Mon, 29 Jul 2024 09:12:15 +0200
Subject: [PATCH 060/427] [compiler-rt][test] Disable lld tests on SPARC
 (#100533)

As detailed in Issue #100320, a considerable number of tests that
explicitly use `-fuse-ld=lld` `FAIL` on Linux/sparc64 due to several
`lld` limitations (no 32-bit SPARC support, lack of support for various
relocations, ...).

To reduce the noise, this patch disables `COMPILER_RT_HAS_LLD` on SPARC
wholesale.

Tested on `sparc64-unknown-linux-gnu`.

(cherry picked from commit 33a50e0eaa80cf3db1b944762db9a37a06f3ac32)
---
 compiler-rt/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 65063e0057bbc..6b8d9a5ea46d6 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -801,6 +801,10 @@ if(ANDROID)
   append_list_if(COMPILER_RT_HAS_FUSE_LD_LLD_FLAG -fuse-ld=lld SANITIZER_COMMON_LINK_FLAGS)
   append_list_if(COMPILER_RT_HAS_LLD -fuse-ld=lld COMPILER_RT_UNITTEST_LINK_FLAGS)
 endif()
+if(${COMPILER_RT_DEFAULT_TARGET_ARCH} MATCHES sparc)
+  # lld has several bugs/limitations on SPARC, so disable (Issue #100320).
+  set(COMPILER_RT_HAS_LLD FALSE)
+endif()
 pythonize_bool(COMPILER_RT_HAS_LLD)
 pythonize_bool(COMPILER_RT_TEST_USE_LLD)
 

From 002fcbd82c00d5c402bf2dabea203b294ef3e00b Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Wed, 24 Jul 2024 10:03:47 +0200
Subject: [PATCH 061/427] [asan][cmake][test] Fix finding dynamic asan runtime
 lib (#100083)

In a `runtimes` build on Solaris/amd64, there are two failues:
```
  AddressSanitizer-Unit :: ./Asan-i386-calls-Dynamic-Test/failed_to_discover_tests_from_gtest
  AddressSanitizer-Unit :: ./Asan-i386-inline-Dynamic-Test/failed_to_discover_tests_from_gtest
```
This happens when `lit` enumerates the tests with `--gtest_list_tests
--gtest_filter=-*DISABLED_*`. The error is twofold:

- The `LD_LIBRARY_PATH*` variables point at the 64-bit directory
(`lib/clang/19/lib/x86_64-pc-solaris2.11`) for a 32-bit test:
  ```
ld.so.1: Asan-i386-calls-Dynamic-Test: fatal:
/var/llvm/local-amd64-release-stage2-A-flang-clang18-runtimes/tools/clang/stage2-bins/./lib/../lib/clang/19/lib/x86_64-pc-solaris2.11/libclang_rt.asan.so:
wrong ELF class: ELFCLASS64
  ```
- While the tests are linked with `-Wl,-rpath`, that path always is the
64-bit directory again.

Accordingly, the fix consists of two parts:
- The code in `compiler-rt/test/asan/Unit/lit.site.cfg.py.in` to adjust
the `LD_LIBRARY_PATH*` variables is guarded by a `config.target_arch !=
config.host_arch` condition. This is wrong in two ways:
- The adjustment is always needed independent of the host arch. This is
what `compiler-rt/test/lit.common.cfg.py` already does.
- Besides, `config.host_arch` is ultimately set from
`CMAKE_HOST_SYSTEM_PROCESSOR`. On Linux/x86_64, this is `x86_64` (`uname
-m`) while on Solaris/amd64 it's `i386` (`uname -p`), explaining why the
transformation is skipped on Solaris, but not on Linux.
- Besides, `RPATH` needs to be set to the correct subdirectory, so
instead of using the default arch in `compiler-rt/CMakeLists.txt`, this
patch moves the code to a function which takes the test's arch into
account.

Tested on `amd64-pc-solaris2.11` and `x86_64-pc-linux-gnu`.

(cherry picked from commit c34d673b02ead039acd107f096c1f32c16b61e07)
---
 compiler-rt/CMakeLists.txt                    |  4 ----
 compiler-rt/cmake/config-ix.cmake             | 13 +++++++++++++
 compiler-rt/lib/asan/tests/CMakeLists.txt     |  4 +++-
 compiler-rt/test/asan/Unit/lit.site.cfg.py.in |  2 +-
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 6b8d9a5ea46d6..2207555b03a03 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -604,10 +604,6 @@ if (COMPILER_RT_TEST_STANDALONE_BUILD_LIBS)
   if ("${COMPILER_RT_TEST_COMPILER_ID}" MATCHES "Clang")
     list(APPEND COMPILER_RT_UNITTEST_LINK_FLAGS "-resource-dir=${COMPILER_RT_OUTPUT_DIR}")
   endif()
-  get_compiler_rt_output_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} rtlib_dir)
-  if (NOT WIN32)
-    list(APPEND COMPILER_RT_UNITTEST_LINK_FLAGS "-Wl,-rpath,${rtlib_dir}")
-  endif()
 endif()
 
 if(COMPILER_RT_USE_LLVM_UNWINDER)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 3a151772e268a..dad557af2ae8c 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -267,6 +267,19 @@ function(get_target_link_flags_for_arch arch out_var)
   endif()
 endfunction()
 
+# Returns a list of architecture specific dynamic ldflags in @out_var list.
+function(get_dynamic_link_flags_for_arch arch out_var)
+  list(FIND COMPILER_RT_SUPPORTED_ARCH ${arch} ARCH_INDEX)
+  if(ARCH_INDEX EQUAL -1)
+    message(FATAL_ERROR "Unsupported architecture: ${arch}")
+  else()
+    get_compiler_rt_output_dir(${arch} rtlib_dir)
+    if (NOT WIN32)
+      set(${out_var} "-Wl,-rpath,${rtlib_dir}" PARENT_SCOPE)
+    endif()
+  endif()
+endfunction()
+
 # Returns a compiler and CFLAGS that should be used to run tests for the
 # specific architecture.  When cross-compiling, this is controled via
 # COMPILER_RT_TEST_COMPILER and COMPILER_RT_TEST_COMPILER_CFLAGS.
diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt
index 7abd4c89ac6bc..b489bb99aeff3 100644
--- a/compiler-rt/lib/asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -206,13 +206,15 @@ function(add_asan_tests arch test_runtime)
           -Wl,-nodefaultlib:libcmt,-defaultlib:msvcrt,-defaultlib:oldnames
         )
     else()
+      set(DYNAMIC_LINK_FLAGS)
+      get_dynamic_link_flags_for_arch(${arch} DYNAMIC_LINK_FLAGS)
 
       # Otherwise, reuse ASAN_INST_TEST_OBJECTS.
       add_compiler_rt_test(AsanDynamicUnitTests "${dynamic_test_name}" "${arch}"
         SUBDIR "${CONFIG_NAME_DYNAMIC}"
         OBJECTS ${ASAN_INST_TEST_OBJECTS}
         DEPS asan ${ASAN_INST_TEST_OBJECTS}
-        LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS}
+        LINK_FLAGS ${ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINK_FLAGS} ${TARGET_LINK_FLAGS} ${DYNAMIC_LINK_FLAGS}
         )
     endif()
   endif()
diff --git a/compiler-rt/test/asan/Unit/lit.site.cfg.py.in b/compiler-rt/test/asan/Unit/lit.site.cfg.py.in
index 638e1dedfc1d2..ac652b53dcb9d 100644
--- a/compiler-rt/test/asan/Unit/lit.site.cfg.py.in
+++ b/compiler-rt/test/asan/Unit/lit.site.cfg.py.in
@@ -53,7 +53,7 @@ config.test_source_root = config.test_exec_root
 # host triple as the trailing path component. The value is incorrect for i386
 # tests on x86_64 hosts and vice versa. Adjust config.compiler_rt_libdir
 # accordingly.
-if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch:
+if config.enable_per_target_runtime_dir:
     if config.target_arch == 'i386':
         config.compiler_rt_libdir = re.sub(r'/x86_64(?=-[^/]+$)', '/i386', config.compiler_rt_libdir)
     elif config.target_arch == 'x86_64':

From 55b063f3f5b00ecdcbbeec85b13fc63b8e4386df Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 23 Jul 2024 21:35:09 -0500
Subject: [PATCH 062/427] [libc] Fix leftover debug commandline argument

Summary:
Fixes https://github.com/llvm/llvm-project/issues/100289

(cherry picked from commit 0420d2f97eac49af5e816b0e3f2a9135d1673168)
---
 libc/cmake/modules/LLVMLibCTestRules.cmake | 1 -
 libc/docs/configure.rst                    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 18adbee0bc7ad..96eb065c4a672 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -651,7 +651,6 @@ function(add_libc_hermetic test_name)
     target_link_options(${fq_build_target_name} PRIVATE
       ${LIBC_COMPILE_OPTIONS_DEFAULT} -Wno-multi-gpu
       -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
-      "-Wl,-asdfasdfasdf"
       "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
       "-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
   elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 5c55e4ab0f181..b81922367d8b7 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -29,7 +29,7 @@ to learn about the defaults for your platform and target.
     - ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack.
     - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience.
 * **"errno" options**
-    - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM.
+    - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM.
 * **"malloc" options**
     - ``LIBC_CONF_FREELIST_MALLOC_BUFFER_SIZE``: Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB).
 * **"math" options**

From a8b7c809ee20bc6e84f9b6280f30d7d2bcfd0a7c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 23 Jul 2024 22:04:47 -0500
Subject: [PATCH 063/427] Update libc/docs/configure.rst

---
 libc/docs/configure.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index b81922367d8b7..5c55e4ab0f181 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -29,7 +29,7 @@ to learn about the defaults for your platform and target.
     - ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack.
     - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience.
 * **"errno" options**
-    - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM.
+    - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM.
 * **"malloc" options**
     - ``LIBC_CONF_FREELIST_MALLOC_BUFFER_SIZE``: Default size for the constinit freelist buffer used for the freelist malloc implementation (default 1o 1GB).
 * **"math" options**

From 360df814b029fc6647672bd3a38ab7a888d073eb Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye@arm.com>
Date: Thu, 25 Jul 2024 09:03:48 +0100
Subject: [PATCH 064/427] [StackFrameLayoutAnalysis] Use target-specific hook
 for SP offsets (#100386)

StackFrameLayoutAnalysis currently calculates SP-relative offsets in a
target-independent way via MachineFrameInfo offsets. This is incorrect
for some Targets, e.g. AArch64, when there are scalable vector stack
slots.

This patch adds a virtual function to TargetFrameLowering to provide
offsets from SP, with a default implementation matching what is
currently used in StackFrameLayoutAnalysis, and refactors
StackFrameLayoutAnalysis to use this function. Only non-zero scalable
offsets are output by the analysis pass.

An implementation of this function is added for AArch64 targets, which
aims to provide correct SP offsets in most cases.

(cherry picked from commit dc1c00f6b13f724154f9883990f8b21fb8dcccef)
---
 .../llvm/CodeGen/TargetFrameLowering.h        |   7 +
 .../CodeGen/StackFrameLayoutAnalysisPass.cpp  |  51 +-
 llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp  |  14 +
 .../Target/AArch64/AArch64FrameLowering.cpp   |  35 ++
 .../lib/Target/AArch64/AArch64FrameLowering.h |   2 +
 .../CodeGen/AArch64/sve-stack-frame-layout.ll | 480 +++++++++++++++++-
 6 files changed, 563 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 72978b2f746d7..0656c0d739fdf 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -343,6 +343,13 @@ class TargetFrameLowering {
     return getFrameIndexReference(MF, FI, FrameReg);
   }
 
+  /// getFrameIndexReferenceFromSP - This method returns the offset from the
+  /// stack pointer to the slot of the specified index. This function serves to
+  /// provide a comparable offset from a single reference point (the value of
+  /// the stack-pointer at function entry) that can be used for analysis.
+  virtual StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                                   int FI) const;
+
   /// Returns the callee-saved registers as computed by determineCalleeSaves
   /// in the BitVector \p SavedRegs.
   virtual void getCalleeSaves(const MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 940aecd1cb363..ff77685f8f354 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -60,15 +60,15 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
     int Slot;
     int Size;
     int Align;
-    int Offset;
+    StackOffset Offset;
     SlotType SlotTy;
     bool Scalable;
 
-    SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx)
+    SlotData(const MachineFrameInfo &MFI, const StackOffset Offset,
+             const int Idx)
         : Slot(Idx), Size(MFI.getObjectSize(Idx)),
-          Align(MFI.getObjectAlign(Idx).value()),
-          Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid),
-          Scalable(false) {
+          Align(MFI.getObjectAlign(Idx).value()), Offset(Offset),
+          SlotTy(Invalid), Scalable(false) {
       Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector;
       if (MFI.isSpillSlotObjectIndex(Idx))
         SlotTy = SlotType::Spill;
@@ -79,10 +79,10 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
     }
 
     // We use this to sort in reverse order, so that the layout is displayed
-    // correctly. Scalable slots are sorted to the end of the list.
+    // correctly.
     bool operator<(const SlotData &Rhs) const {
-      return std::make_tuple(!Scalable, Offset) >
-             std::make_tuple(!Rhs.Scalable, Rhs.Offset);
+      return (Offset.getFixed() + Offset.getScalable()) >
+             (Rhs.Offset.getFixed() + Rhs.Offset.getScalable());
     }
   };
 
@@ -149,15 +149,27 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
     // For example we store the Offset in YAML as:
     //    ...
     //    - Offset: -8
+    //    - ScalableOffset: -16
+    // Note: the ScalableOffset entries are added only for slots with non-zero
+    // scalable offsets.
     //
-    // But we print it to the CLI as
+    // But we print it to the CLI as:
     //   Offset: [SP-8]
+    //
+    // Or with non-zero scalable offset:
+    //   Offset: [SP-8-16 x vscale]
 
     // Negative offsets will print a leading `-`, so only add `+`
     std::string Prefix =
-        formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str();
-    Rem << Prefix << ore::NV("Offset", D.Offset)
-        << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
+        formatv("\nOffset: [SP{0}", (D.Offset.getFixed() < 0) ? "" : "+").str();
+    Rem << Prefix << ore::NV("Offset", D.Offset.getFixed());
+
+    if (D.Offset.getScalable()) {
+      Rem << ((D.Offset.getScalable() < 0) ? "" : "+")
+          << ore::NV("ScalableOffset", D.Offset.getScalable()) << " x vscale";
+    }
+
+    Rem << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
         << ", Align: " << ore::NV("Align", D.Align)
         << ", Size: " << ore::NV("Size", ElementCount::get(D.Size, D.Scalable));
   }
@@ -170,17 +182,22 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
     Rem << "\n    " << ore::NV("DataLoc", Loc);
   }
 
+  StackOffset getStackOffset(const MachineFunction &MF,
+                             const MachineFrameInfo &MFI,
+                             const TargetFrameLowering *FI, int FrameIdx) {
+    if (!FI)
+      return StackOffset::getFixed(MFI.getObjectOffset(FrameIdx));
+
+    return FI->getFrameIndexReferenceFromSP(MF, FrameIdx);
+  }
+
   void emitStackFrameLayoutRemarks(MachineFunction &MF,
                                    MachineOptimizationRemarkAnalysis &Rem) {
     const MachineFrameInfo &MFI = MF.getFrameInfo();
     if (!MFI.hasStackObjects())
       return;
 
-    // ValOffset is the offset to the local area from the SP at function entry.
-    // To display the true offset from SP, we need to subtract ValOffset from
-    // MFI's ObjectOffset.
     const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
-    const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
     LLVM_DEBUG(dbgs() << "getStackProtectorIndex =="
                       << MFI.getStackProtectorIndex() << "\n");
@@ -194,7 +211,7 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
          Idx != EndIdx; ++Idx) {
       if (MFI.isDeadObjectIndex(Idx))
         continue;
-      SlotInfo.emplace_back(MFI, ValOffset, Idx);
+      SlotInfo.emplace_back(MFI, getStackOffset(MF, MFI, FI, Idx), Idx);
     }
 
     // sort the ordering, to match the actual layout in memory
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 48a2094f5d451..7d054cb7c7c71 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -61,6 +61,20 @@ TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                MFI.getOffsetAdjustment());
 }
 
+/// Returns the offset from the stack pointer to the slot of the specified
+/// index. This function serves to provide a comparable offset from a single
+/// reference point (the value of the stack-pointer at function entry) that can
+/// be used for analysis. This is the default implementation using
+/// MachineFrameInfo offsets.
+StackOffset
+TargetFrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                                  int FI) const {
+  // To display the true offset from SP, we need to subtract the offset to the
+  // local area from MFI's ObjectOffset.
+  return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI) -
+                               getOffsetOfLocalArea());
+}
+
 bool TargetFrameLowering::needsFrameIndexResolution(
     const MachineFunction &MF) const {
   return MF.getFrameInfo().hasStackObjects();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b1b83e27c5592..bd530903bb664 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2603,6 +2603,41 @@ AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
       /*ForSimm=*/false);
 }
 
+StackOffset
+AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                                   int FI) const {
+  // This function serves to provide a comparable offset from a single reference
+  // point (the value of SP at function entry) that can be used for analysis,
+  // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be
+  // correct for all objects in the presence of VLA-area objects or dynamic
+  // stack re-alignment.
+
+  const auto &MFI = MF.getFrameInfo();
+
+  int64_t ObjectOffset = MFI.getObjectOffset(FI);
+
+  // This is correct in the absence of any SVE stack objects.
+  StackOffset SVEStackSize = getSVEStackSize(MF);
+  if (!SVEStackSize)
+    return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
+
+  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+  if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+    return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+                            ObjectOffset);
+  }
+
+  bool IsFixed = MFI.isFixedObjectIndex(FI);
+  bool IsCSR =
+      !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
+
+  StackOffset ScalableOffset = {};
+  if (!IsFixed && !IsCSR)
+    ScalableOffset = -SVEStackSize;
+
+  return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
+}
+
 StackOffset
 AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
                                                      int FI) const {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index da315850d6362..0ebab1700e9ce 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -41,6 +41,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
 
   StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
                                      Register &FrameReg) const override;
+  StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                           int FI) const override;
   StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
                                          Register &FrameReg, bool PreferFP,
                                          bool ForSimm) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 34d85d1f76086..36bca2ebd4ada 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -5,9 +5,9 @@
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20], Type: Variable, Align: 4, Size: 4
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-16 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: csr_d8_allocnxv4i32i32f64:
@@ -49,8 +49,8 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20], Type: Variable, Align: 4, Size: 4
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @csr_d8_allocnxv4i32i32f64_fp(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
 ; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_fp:
@@ -90,13 +90,167 @@ entry:
   ret i32 0
 }
 
+; In the presence of dynamic stack-realignment we emit correct offsets for
+; objects which are not realigned. For realigned objects, e.g. the i32 alloca
+; in this test, we emit the correct offset ignoring the re-alignment (i.e. the
+; offset if the alignment requirement is already satisfied).
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_dynamicrealign
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128-16 x vscale], Type: Variable, Align: 128, Size: 4
+
+define i32 @csr_d8_allocnxv4i32i32f64_dynamicrealign(double %d) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_dynamicrealign:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    sub x9, sp, #96
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    addvl x9, x9, #-1
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sub x8, x29, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp]
+; CHECK-NEXT:    stur d0, [x29, #-8]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT:    sub sp, x29, #16
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %a = alloca <vscale x 4 x i32>
+  %b = alloca i32, align 128
+  %c = alloca double
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> zeroinitializer, ptr %a
+  store i32 zeroinitializer, ptr %b
+  store double %d, ptr %c
+  ret i32 0
+}
+
+; In the presence of VLA-area objects, we emit correct offsets for all objects
+; except for these VLA objects.
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_vla
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
+
+define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #8
+; CHECK-NEXT:    str x19, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    mov x19, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 24
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset w29, -24
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    ubfiz x8, x0, #2, #32
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    add x8, x8, #15
+; CHECK-NEXT:    and x8, x8, #0x7fffffff0
+; CHECK-NEXT:    sub x8, x9, x8
+; CHECK-NEXT:    mov sp, x8
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [x8]
+; CHECK-NEXT:    sub x8, x29, #8
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    str d0, [x19, #8]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT:    sub sp, x29, #8
+; CHECK-NEXT:    ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %a = alloca <vscale x 4 x i32>
+  %0 = zext i32 %i to i64
+  %b = alloca i32, i64 %0
+  %c = alloca double
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> zeroinitializer, ptr %a
+  store i32 zeroinitializer, ptr %b
+  store double %d, ptr %c
+  ret i32 0
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_stackargsi32f64
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+8], Type: Variable, Align: 8, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Protector, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-16 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
+
+define i32 @csr_d8_allocnxv4i32i32f64_stackargsi32f64(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_stackargsi32f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    str x29, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -8
+; CHECK-NEXT:    .cfi_offset b8, -16
+; CHECK-NEXT:    mov z1.s, #0 // =0x0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    add x8, sp, #16
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    str d0, [sp]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ldr x29, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %a = alloca <vscale x 4 x i32>
+  %b = alloca i32
+  %c = alloca double
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> zeroinitializer, ptr %a
+  store i32 zeroinitializer, ptr %b
+  store double %d0, ptr %c
+  ret i32 0
+}
+
 ; CHECK-FRAMELAYOUT-LABEL: Function: svecc_z8_allocnxv4i32i32f64_fp
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20], Type: Variable, Align: 4, Size: 4
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: vscale x 16
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-32 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-32 x vscale], Type: Variable, Align: 8, Size: 8
 
 define i32 @svecc_z8_allocnxv4i32i32f64_fp(double %d, <vscale x 4 x i32> %v) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
 ; CHECK-LABEL: svecc_z8_allocnxv4i32i32f64_fp:
@@ -133,3 +287,311 @@ entry:
   store double %d, ptr %c
   ret i32 0
 }
+
+; CHECK-FRAMELAYOUT-LABEL: Function: svecc_z8_allocnxv4i32i32f64_stackargsi32_fp
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Protector, Align: 16, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-32 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-32 x vscale], Type: Variable, Align: 8, Size: 8
+
+define i32 @svecc_z8_allocnxv4i32i32f64_stackargsi32_fp(double %d, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, <vscale x 4 x i32> %v) "aarch64_pstate_sm_compatible" "frame-pointer"="all"{
+; CHECK-LABEL: svecc_z8_allocnxv4i32i32f64_stackargsi32_fp:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str z8, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x29, #-2, mul vl]
+; CHECK-NEXT:    str d0, [sp], #16
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr z8, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %a = alloca <vscale x 4 x i32>
+  %b = alloca i32
+  %c = alloca double
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> %v, ptr %a
+  store i32 zeroinitializer, ptr %b
+  store double %d, ptr %c
+  ret i32 0
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: svecc_call
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-48 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-258 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-260 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-262 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-264 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-266 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-268 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-270 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-272 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-274 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-276 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-278 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-280 x vscale], Type: Spill, Align: 2, Size: vscale x 2
+
+define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: svecc_call:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    stp x9, x28, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x27, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w27, -16
+; CHECK-NEXT:    .cfi_offset w28, -24
+; CHECK-NEXT:    .cfi_offset w30, -40
+; CHECK-NEXT:    .cfi_offset w29, -48
+; CHECK-NEXT:    addvl sp, sp, #-18
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 144 * VG
+; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 48 - 32 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 48 - 40 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 48 - 48 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 48 - 56 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 48 - 64 * VG
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    //APP
+; CHECK-NEXT:    //NO_APP
+; CHECK-NEXT:    bl __arm_sme_state
+; CHECK-NEXT:    and x19, x0, #0x1
+; CHECK-NEXT:    .cfi_offset vg, -32
+; CHECK-NEXT:    tbz w19, #0, .LBB7_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:  .LBB7_2: // %entry
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    mov w1, #45 // =0x2d
+; CHECK-NEXT:    mov w2, #37 // =0x25
+; CHECK-NEXT:    bl memset
+; CHECK-NEXT:    tbz w19, #0, .LBB7_4
+; CHECK-NEXT:  // %bb.3: // %entry
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:  .LBB7_4: // %entry
+; CHECK-NEXT:    mov w0, #22647 // =0x5877
+; CHECK-NEXT:    movk w0, #59491, lsl #16
+; CHECK-NEXT:    .cfi_restore vg
+; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #18
+; CHECK-NEXT:    .cfi_def_cfa wsp, 48
+; CHECK-NEXT:    .cfi_restore z8
+; CHECK-NEXT:    .cfi_restore z9
+; CHECK-NEXT:    .cfi_restore z10
+; CHECK-NEXT:    .cfi_restore z11
+; CHECK-NEXT:    .cfi_restore z12
+; CHECK-NEXT:    .cfi_restore z13
+; CHECK-NEXT:    .cfi_restore z14
+; CHECK-NEXT:    .cfi_restore z15
+; CHECK-NEXT:    ldp x27, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x28, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp], #48 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w27
+; CHECK-NEXT:    .cfi_restore w28
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+entry:
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
+  ret i32 -396142473
+}
+declare ptr @memset(ptr, i32, i32)
+
+; The VA register currently ends up in VLA space - in the presence of VLA-area
+; objects, we emit correct offsets for all objects except for these VLA objects.
+
+; CHECK-FRAMELAYOUT-LABEL: Function: vastate
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-56], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-72], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-80], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-88], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-96], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-104], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-112], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: Variable, Align: 16, Size: 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: Variable, Align: 16, Size: 0
+
+define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" {
+; CHECK-LABEL: vastate:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    str x9, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa w29, 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -40
+; CHECK-NEXT:    .cfi_offset w29, -48
+; CHECK-NEXT:    .cfi_offset b8, -56
+; CHECK-NEXT:    .cfi_offset b9, -64
+; CHECK-NEXT:    .cfi_offset b10, -72
+; CHECK-NEXT:    .cfi_offset b11, -80
+; CHECK-NEXT:    .cfi_offset b12, -88
+; CHECK-NEXT:    .cfi_offset b13, -96
+; CHECK-NEXT:    .cfi_offset b14, -104
+; CHECK-NEXT:    .cfi_offset b15, -112
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    rdsvl x8, #1
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    mov w20, w0
+; CHECK-NEXT:    msub x9, x8, x8, x9
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    stur x9, [x29, #-80]
+; CHECK-NEXT:    sub x9, x29, #80
+; CHECK-NEXT:    sturh wzr, [x29, #-70]
+; CHECK-NEXT:    stur wzr, [x29, #-68]
+; CHECK-NEXT:    sturh w8, [x29, #-72]
+; CHECK-NEXT:    msr TPIDR2_EL0, x9
+; CHECK-NEXT:    .cfi_offset vg, -32
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl other
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .cfi_restore vg
+; CHECK-NEXT:    smstart za
+; CHECK-NEXT:    mrs x8, TPIDR2_EL0
+; CHECK-NEXT:    sub x0, x29, #80
+; CHECK-NEXT:    cbnz x8, .LBB8_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    bl __arm_tpidr2_restore
+; CHECK-NEXT:  .LBB8_2: // %entry
+; CHECK-NEXT:    mov w0, w20
+; CHECK-NEXT:    msr TPIDR2_EL0, xzr
+; CHECK-NEXT:    sub sp, x29, #64
+; CHECK-NEXT:    .cfi_def_cfa wsp, 112
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #112 // 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w19
+; CHECK-NEXT:    .cfi_restore w20
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+entry:
+  tail call void @other()
+  ret i32 %x
+}
+declare void @other()

From b3a73a1559317019b9c5eb01f19af78bffd48dbe Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye@arm.com>
Date: Thu, 25 Jul 2024 18:54:24 +0100
Subject: [PATCH 065/427] [StackFrameLayoutAnalysis] Support more SlotTypes
 (#100562)

Add new SlotTypes to StackFrameLayoutAnalysis to disambiguate Fixed and
Variable-Sized stack slots from Variable slots. As Offsets are
unreliable for VLA-area objects, sort these to the end of the list -
using the Frame Index to ensure a deterministic order when Offsets are
equal.

(cherry picked from commit e31794f99d72dd764c4bc5c5583a0a4c89df22c3)
---
 .../CodeGen/StackFrameLayoutAnalysisPass.cpp  | 27 ++++++++++++++++---
 .../CodeGen/AArch64/sve-stack-frame-layout.ll | 25 ++++++++++-------
 .../CodeGen/X86/stack-frame-layout-remarks.ll | 12 ++++-----
 3 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index ff77685f8f354..0a7a6bad4e86d 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -51,6 +51,8 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
 
   enum SlotType {
     Spill,          // a Spill slot
+    Fixed,          // a Fixed slot (e.g. arguments passed on the stack)
+    VariableSized,  // a variable sized object
     StackProtector, // Stack Protector slot
     Variable,       // a slot used to store a local data (could be a tmp)
     Invalid         // It's an error for a slot to have this type
@@ -72,17 +74,30 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
       Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector;
       if (MFI.isSpillSlotObjectIndex(Idx))
         SlotTy = SlotType::Spill;
-      else if (Idx == MFI.getStackProtectorIndex())
+      else if (MFI.isFixedObjectIndex(Idx))
+        SlotTy = SlotType::Fixed;
+      else if (MFI.isVariableSizedObjectIndex(Idx))
+        SlotTy = SlotType::VariableSized;
+      else if (MFI.hasStackProtectorIndex() &&
+               Idx == MFI.getStackProtectorIndex())
         SlotTy = SlotType::StackProtector;
       else
         SlotTy = SlotType::Variable;
     }
 
+    bool isVarSize() const { return SlotTy == SlotType::VariableSized; }
+
     // We use this to sort in reverse order, so that the layout is displayed
-    // correctly.
+    // correctly. Variable sized slots are sorted to the end of the list, as
+    // offsets are currently incorrect for these but they reside at the end of
+    // the stack frame. The Slot index is used to ensure deterministic order
+    // when offsets are equal.
     bool operator<(const SlotData &Rhs) const {
-      return (Offset.getFixed() + Offset.getScalable()) >
-             (Rhs.Offset.getFixed() + Rhs.Offset.getScalable());
+      return std::make_tuple(!isVarSize(),
+                             Offset.getFixed() + Offset.getScalable(), Slot) >
+             std::make_tuple(!Rhs.isVarSize(),
+                             Rhs.Offset.getFixed() + Rhs.Offset.getScalable(),
+                             Rhs.Slot);
     }
   };
 
@@ -121,6 +136,10 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
     switch (Ty) {
     case SlotType::Spill:
       return "Spill";
+    case SlotType::Fixed:
+      return "Fixed";
+    case SlotType::VariableSized:
+      return "VariableSized";
     case SlotType::StackProtector:
       return "Protector";
     case SlotType::Variable:
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 36bca2ebd4ada..431c9dc76508f 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -147,10 +147,11 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 1, Size: 0
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
 
 define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
@@ -172,7 +173,10 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    add x8, x8, #15
 ; CHECK-NEXT:    and x8, x8, #0x7fffffff0
-; CHECK-NEXT:    sub x8, x9, x8
+; CHECK-NEXT:    sub x9, x9, x8
+; CHECK-NEXT:    mov sp, x9
+; CHECK-NEXT:    mov x10, sp
+; CHECK-NEXT:    sub x8, x10, x8
 ; CHECK-NEXT:    mov sp, x8
 ; CHECK-NEXT:    mov z1.s, #0 // =0x0
 ; CHECK-NEXT:    ptrue p0.s
@@ -181,8 +185,9 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_
 ; CHECK-NEXT:    str wzr, [x8]
 ; CHECK-NEXT:    sub x8, x29, #8
 ; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    str d0, [x19, #8]
+; CHECK-NEXT:    str wzr, [x9]
 ; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT:    str d0, [x19, #8]
 ; CHECK-NEXT:    sub sp, x29, #8
 ; CHECK-NEXT:    ldp x29, x30, [sp, #8] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x19, [sp, #24] // 8-byte Folded Reload
@@ -191,18 +196,20 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_
 entry:
   %a = alloca <vscale x 4 x i32>
   %0 = zext i32 %i to i64
-  %b = alloca i32, i64 %0
+  %vla0 = alloca i32, i64 %0
+  %vla1 = alloca i32, i64 %0
   %c = alloca double
   tail call void asm sideeffect "", "~{d8}"() #1
   store <vscale x 4 x i32> zeroinitializer, ptr %a
-  store i32 zeroinitializer, ptr %b
+  store i32 zeroinitializer, ptr %vla0
+  store i32 zeroinitializer, ptr %vla1
   store double %d, ptr %c
   ret i32 0
 }
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_stackargsi32f64
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+8], Type: Variable, Align: 8, Size: 4
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Protector, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+8], Type: Fixed, Align: 8, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Fixed, Align: 16, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
@@ -289,7 +296,7 @@ entry:
 }
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: svecc_z8_allocnxv4i32i32f64_stackargsi32_fp
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Protector, Align: 16, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Fixed, Align: 16, Size: 4
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 16, Size: vscale x 16
@@ -514,7 +521,7 @@ declare ptr @memset(ptr, i32, i32)
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-104], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-112], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: Variable, Align: 16, Size: 16
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: Variable, Align: 16, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128], Type: VariableSized, Align: 16, Size: 0
 
 define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "target-features"="+sme" {
 ; CHECK-LABEL: vastate:
diff --git a/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll
index cd5edcf2ae502..d8ce5b041042e 100644
--- a/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll
+++ b/llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll
@@ -35,7 +35,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
 
 ; BOTH: Function: cleanup_array
-; BOTH-NEXT:  Offset: [SP+4], Type: Protector, Align: 16, Size: 4
+; BOTH-NEXT:  Offset: [SP+4], Type: Fixed, Align: 16, Size: 4
 ; DEBUG: a @ dot.c:13
 ; STRIPPED-NOT: a @ dot.c:13
 ; BOTH:  Offset: [SP-4], Type: Spill, Align: 8, Size: 4
@@ -47,7 +47,7 @@ define void @cleanup_array(ptr %0) #1 {
 }
 
 ; BOTH: Function: cleanup_result
-; BOTH:  Offset: [SP+4], Type: Protector, Align: 16, Size: 4
+; BOTH:  Offset: [SP+4], Type: Fixed, Align: 16, Size: 4
 ; DEBUG: res @ dot.c:21
 ; STRIPPED-NOT: res @ dot.c:21
 ; BOTH:  Offset: [SP-4], Type: Spill, Align: 8, Size: 4
@@ -59,11 +59,11 @@ define void @cleanup_result(ptr %0) #1 {
 }
 
 ; BOTH: Function: do_work
-; BOTH:  Offset: [SP+12], Type: Variable, Align: 8, Size: 4
+; BOTH:  Offset: [SP+12], Type: Fixed, Align: 8, Size: 4
 ; DEBUG: out @ dot.c:32
 ; STRIPPED-NOT: out @ dot.c:32
-; BOTH:  Offset: [SP+8], Type: Variable, Align: 4, Size: 4
-; BOTH:  Offset: [SP+4], Type: Protector, Align: 16, Size: 4
+; BOTH:  Offset: [SP+8], Type: Fixed, Align: 4, Size: 4
+; BOTH:  Offset: [SP+4], Type: Fixed, Align: 16, Size: 4
 ; DEBUG: A @ dot.c:32
 ; STRIPPED-NOT: A @ dot.c:32
 ; BOTH:  Offset: [SP-4], Type: Spill, Align: 8, Size: 4
@@ -125,7 +125,7 @@ define i32 @do_work(ptr %0, ptr %1, ptr %2) #2 {
 }
 
 ; BOTH: Function: gen_array
-; BOTH:  Offset: [SP+4], Type: Protector, Align: 16, Size: 4
+; BOTH:  Offset: [SP+4], Type: Fixed, Align: 16, Size: 4
 ; DEBUG: size @ dot.c:62
 ; STRIPPED-NOT: size @ dot.c:62
 ; BOTH:  Offset: [SP-4], Type: Spill, Align: 8, Size: 4

From cd302f3914a42da49542cf2f33a39f2c968471ee Mon Sep 17 00:00:00 2001
From: Daniil Kovalev <dkovalev@accesssoftek.com>
Date: Fri, 26 Jul 2024 23:00:16 +0300
Subject: [PATCH 066/427] [PAC][test] Add tests against Linux triples for
 auth/resign lowering (#100744)

The lowering implementation and tests against arm64e-apple-darwin triple
were added previously in #79024.

(cherry picked from commit 53283dc4645ee13f33dd9b98cc935b376bf78232)
---
 llvm/test/CodeGen/AArch64/ptrauth-fpac.ll     | 100 ++++-----
 ...trauth-intrinsic-auth-resign-with-blend.ll | 139 +++++++-----
 .../AArch64/ptrauth-intrinsic-auth-resign.ll  | 205 ++++++++++--------
 3 files changed, 241 insertions(+), 203 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/ptrauth-fpac.ll b/llvm/test/CodeGen/AArch64/ptrauth-fpac.ll
index 6afe1a93d986e..d5340dcebad57 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-fpac.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-fpac.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple arm64e-apple-darwin                                                -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,NOFPAC
-; RUN: llc < %s -mtriple arm64e-apple-darwin -mattr=+fpac                                   -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,FPAC
+; RUN: llc < %s -mtriple arm64e-apple-darwin                          -verify-machineinstrs | FileCheck %s -DL="L"  --check-prefixes=ALL,NOFPAC
+; RUN: llc < %s -mtriple arm64e-apple-darwin             -mattr=+fpac -verify-machineinstrs | FileCheck %s -DL="L"  --check-prefixes=ALL,FPAC
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth              -verify-machineinstrs | FileCheck %s -DL=".L" --check-prefixes=ALL,NOFPAC
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -mattr=+fpac -verify-machineinstrs | FileCheck %s -DL=".L" --check-prefixes=ALL,FPAC
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 define i64 @test_auth_ia(i64 %arg, i64 %arg1) {
 ; ALL-LABEL: test_auth_ia:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autia x16, x1
 ; ALL-NEXT:    mov x0, x16
@@ -17,7 +19,7 @@ define i64 @test_auth_ia(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_ia_zero(i64 %arg) {
 ; ALL-LABEL: test_auth_ia_zero:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autiza x16
 ; ALL-NEXT:    mov x0, x16
@@ -28,7 +30,7 @@ define i64 @test_auth_ia_zero(i64 %arg) {
 
 define i64 @test_auth_ib(i64 %arg, i64 %arg1) {
 ; ALL-LABEL: test_auth_ib:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autib x16, x1
 ; ALL-NEXT:    mov x0, x16
@@ -39,7 +41,7 @@ define i64 @test_auth_ib(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_ib_zero(i64 %arg) {
 ; ALL-LABEL: test_auth_ib_zero:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autizb x16
 ; ALL-NEXT:    mov x0, x16
@@ -50,7 +52,7 @@ define i64 @test_auth_ib_zero(i64 %arg) {
 
 define i64 @test_auth_da(i64 %arg, i64 %arg1) {
 ; ALL-LABEL: test_auth_da:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autda x16, x1
 ; ALL-NEXT:    mov x0, x16
@@ -61,7 +63,7 @@ define i64 @test_auth_da(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_da_zero(i64 %arg) {
 ; ALL-LABEL: test_auth_da_zero:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autdza x16
 ; ALL-NEXT:    mov x0, x16
@@ -72,7 +74,7 @@ define i64 @test_auth_da_zero(i64 %arg) {
 
 define i64 @test_auth_db(i64 %arg, i64 %arg1) {
 ; ALL-LABEL: test_auth_db:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autdb x16, x1
 ; ALL-NEXT:    mov x0, x16
@@ -83,7 +85,7 @@ define i64 @test_auth_db(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_db_zero(i64 %arg) {
 ; ALL-LABEL: test_auth_db_zero:
-; ALL:       ; %bb.0:
+; ALL:       %bb.0:
 ; ALL-NEXT:    mov x16, x0
 ; ALL-NEXT:    autdzb x16
 ; ALL-NEXT:    mov x0, x16
@@ -96,15 +98,15 @@ define i64 @test_auth_db_zero(i64 %arg) {
 ; the validity of a signature.
 define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_ia_ia:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autia x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpaci x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_0
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_0
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_0
+; NOFPAC-NEXT:    b [[L]]resign_end_0
 ; NOFPAC-NEXT:  Lauth_success_0:
 ; NOFPAC-NEXT:    pacia x16, x2
 ; NOFPAC-NEXT:  Lresign_end_0:
@@ -112,7 +114,7 @@ define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_ia_ia:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autia x16, x1
 ; FPAC-NEXT:    pacia x16, x2
@@ -124,15 +126,15 @@ define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_ib_ia:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autib x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpaci x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_1
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_1
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_1
+; NOFPAC-NEXT:    b [[L]]resign_end_1
 ; NOFPAC-NEXT:  Lauth_success_1:
 ; NOFPAC-NEXT:    pacia x16, x2
 ; NOFPAC-NEXT:  Lresign_end_1:
@@ -140,7 +142,7 @@ define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_ib_ia:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autib x16, x1
 ; FPAC-NEXT:    pacia x16, x2
@@ -152,15 +154,15 @@ define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_da_ia:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autda x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_2
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_2
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_2
+; NOFPAC-NEXT:    b [[L]]resign_end_2
 ; NOFPAC-NEXT:  Lauth_success_2:
 ; NOFPAC-NEXT:    pacia x16, x2
 ; NOFPAC-NEXT:  Lresign_end_2:
@@ -168,7 +170,7 @@ define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_da_ia:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autda x16, x1
 ; FPAC-NEXT:    pacia x16, x2
@@ -180,15 +182,15 @@ define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_db_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_db_ia:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autdb x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_3
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_3
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_3
+; NOFPAC-NEXT:    b [[L]]resign_end_3
 ; NOFPAC-NEXT:  Lauth_success_3:
 ; NOFPAC-NEXT:    pacia x16, x2
 ; NOFPAC-NEXT:  Lresign_end_3:
@@ -196,7 +198,7 @@ define i64 @test_resign_db_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_db_ia:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autdb x16, x1
 ; FPAC-NEXT:    pacia x16, x2
@@ -208,15 +210,15 @@ define i64 @test_resign_db_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_db_ib(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_db_ib:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autdb x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_4
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_4
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_4
+; NOFPAC-NEXT:    b [[L]]resign_end_4
 ; NOFPAC-NEXT:  Lauth_success_4:
 ; NOFPAC-NEXT:    pacib x16, x2
 ; NOFPAC-NEXT:  Lresign_end_4:
@@ -224,7 +226,7 @@ define i64 @test_resign_db_ib(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_db_ib:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autdb x16, x1
 ; FPAC-NEXT:    pacib x16, x2
@@ -236,15 +238,15 @@ define i64 @test_resign_db_ib(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_db_da:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autdb x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_5
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_5
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_5
+; NOFPAC-NEXT:    b [[L]]resign_end_5
 ; NOFPAC-NEXT:  Lauth_success_5:
 ; NOFPAC-NEXT:    pacda x16, x2
 ; NOFPAC-NEXT:  Lresign_end_5:
@@ -252,7 +254,7 @@ define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_db_da:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autdb x16, x1
 ; FPAC-NEXT:    pacda x16, x2
@@ -264,15 +266,15 @@ define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_db_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_db_db:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autdb x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_6
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_6
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_6
+; NOFPAC-NEXT:    b [[L]]resign_end_6
 ; NOFPAC-NEXT:  Lauth_success_6:
 ; NOFPAC-NEXT:    pacdb x16, x2
 ; NOFPAC-NEXT:  Lresign_end_6:
@@ -280,7 +282,7 @@ define i64 @test_resign_db_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_db_db:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autdb x16, x1
 ; FPAC-NEXT:    pacdb x16, x2
@@ -292,15 +294,15 @@ define i64 @test_resign_db_db(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_iza_db:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autiza x16
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpaci x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_7
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_7
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_7
+; NOFPAC-NEXT:    b [[L]]resign_end_7
 ; NOFPAC-NEXT:  Lauth_success_7:
 ; NOFPAC-NEXT:    pacdb x16, x2
 ; NOFPAC-NEXT:  Lresign_end_7:
@@ -308,7 +310,7 @@ define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_iza_db:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autiza x16
 ; FPAC-NEXT:    pacdb x16, x2
@@ -320,15 +322,15 @@ define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-LABEL: test_resign_da_dzb:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autda x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpacd x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_8
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_8
 ; NOFPAC-NEXT:    mov x16, x17
-; NOFPAC-NEXT:    b Lresign_end_8
+; NOFPAC-NEXT:    b [[L]]resign_end_8
 ; NOFPAC-NEXT:  Lauth_success_8:
 ; NOFPAC-NEXT:    pacdzb x16
 ; NOFPAC-NEXT:  Lresign_end_8:
@@ -336,7 +338,7 @@ define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_resign_da_dzb:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autda x16, x1
 ; FPAC-NEXT:    pacdzb x16
@@ -348,20 +350,20 @@ define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_auth_trap_attribute(i64 %arg, i64 %arg1) "ptrauth-auth-traps" {
 ; NOFPAC-LABEL: test_auth_trap_attribute:
-; NOFPAC:       ; %bb.0:
+; NOFPAC:       %bb.0:
 ; NOFPAC-NEXT:    mov x16, x0
 ; NOFPAC-NEXT:    autia x16, x1
 ; NOFPAC-NEXT:    mov x17, x16
 ; NOFPAC-NEXT:    xpaci x17
 ; NOFPAC-NEXT:    cmp x16, x17
-; NOFPAC-NEXT:    b.eq Lauth_success_9
+; NOFPAC-NEXT:    b.eq [[L]]auth_success_9
 ; NOFPAC-NEXT:    brk #0xc470
 ; NOFPAC-NEXT:  Lauth_success_9:
 ; NOFPAC-NEXT:    mov x0, x16
 ; NOFPAC-NEXT:    ret
 ;
 ; FPAC-LABEL: test_auth_trap_attribute:
-; FPAC:       ; %bb.0:
+; FPAC:       %bb.0:
 ; FPAC-NEXT:    mov x16, x0
 ; FPAC-NEXT:    autia x16, x1
 ; FPAC-NEXT:    mov x0, x16
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign-with-blend.ll b/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign-with-blend.ll
index 3b93acd8e46f7..74d2370c74c54 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign-with-blend.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign-with-blend.ll
@@ -1,24 +1,39 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s --check-prefix=UNCHECKED
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL="L" --check-prefixes=UNCHECKED,UNCHECKED-DARWIN
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s --check-prefix=UNCHECKED
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL="L" --check-prefixes=UNCHECKED,UNCHECKED-DARWIN
 
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:                                     | FileCheck %s --check-prefix=CHECKED
+; RUN:                                     | FileCheck %s -DL="L" --check-prefixes=CHECKED,CHECKED-DARWIN
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:                                     | FileCheck %s --check-prefix=CHECKED
+; RUN:                                     | FileCheck %s -DL="L" --check-prefixes=CHECKED,CHECKED-DARWIN
 
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s --check-prefix=TRAP
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL="L" --check-prefixes=TRAP,TRAP-DARWIN
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s --check-prefix=TRAP
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL="L" --check-prefixes=TRAP,TRAP-DARWIN
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL=".L" --check-prefixes=UNCHECKED,UNCHECKED-ELF
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL=".L" --check-prefixes=UNCHECKED,UNCHECKED-ELF
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:                                     | FileCheck %s -DL=".L" --check-prefixes=CHECKED,CHECKED-ELF
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:                                     | FileCheck %s -DL=".L" --check-prefixes=CHECKED,CHECKED-ELF
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL=".L" --check-prefixes=TRAP,TRAP-ELF
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL=".L" --check-prefixes=TRAP,TRAP-ELF
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 define i64 @test_auth_blend(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_auth_blend:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    mov x17, x1
 ; UNCHECKED-NEXT:    movk x17, #65535, lsl #48
@@ -27,7 +42,7 @@ define i64 @test_auth_blend(i64 %arg, i64 %arg1) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_blend:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    mov x17, x1
 ; CHECKED-NEXT:    movk x17, #65535, lsl #48
@@ -36,7 +51,7 @@ define i64 @test_auth_blend(i64 %arg, i64 %arg1) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_blend:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    mov x17, x1
 ; TRAP-NEXT:    movk x17, #65535, lsl #48
@@ -44,7 +59,7 @@ define i64 @test_auth_blend(i64 %arg, i64 %arg1) {
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_0
+; TRAP-NEXT:    b.eq [[L]]auth_success_0
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_0:
 ; TRAP-NEXT:    mov x0, x16
@@ -56,7 +71,7 @@ define i64 @test_auth_blend(i64 %arg, i64 %arg1) {
 
 define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_blend:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    mov x17, x1
 ; UNCHECKED-NEXT:    movk x17, #12345, lsl #48
@@ -68,7 +83,7 @@ define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_blend:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    mov x17, x1
 ; CHECKED-NEXT:    movk x17, #12345, lsl #48
@@ -76,9 +91,9 @@ define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_0
+; CHECKED-NEXT:    b.eq [[L]]auth_success_0
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_0
+; CHECKED-NEXT:    b [[L]]resign_end_0
 ; CHECKED-NEXT:  Lauth_success_0:
 ; CHECKED-NEXT:    mov x17, x2
 ; CHECKED-NEXT:    movk x17, #56789, lsl #48
@@ -88,7 +103,7 @@ define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_blend:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    mov x17, x1
 ; TRAP-NEXT:    movk x17, #12345, lsl #48
@@ -96,7 +111,7 @@ define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_1
+; TRAP-NEXT:    b.eq [[L]]auth_success_1
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_1:
 ; TRAP-NEXT:    mov x17, x2
@@ -112,18 +127,18 @@ define i64 @test_resign_blend(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_blend_and_const(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_resign_blend_and_const:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    mov x17, x1
 ; UNCHECKED-NEXT:    movk x17, #12345, lsl #48
 ; UNCHECKED-NEXT:    autda x16, x17
-; UNCHECKED-NEXT:    mov x17, #56789 ; =0xddd5
+; UNCHECKED-NEXT:    mov x17, #56789
 ; UNCHECKED-NEXT:    pacdb x16, x17
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_blend_and_const:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    mov x17, x1
 ; CHECKED-NEXT:    movk x17, #12345, lsl #48
@@ -131,18 +146,18 @@ define i64 @test_resign_blend_and_const(i64 %arg, i64 %arg1) {
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_1
+; CHECKED-NEXT:    b.eq [[L]]auth_success_1
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_1
+; CHECKED-NEXT:    b [[L]]resign_end_1
 ; CHECKED-NEXT:  Lauth_success_1:
-; CHECKED-NEXT:    mov x17, #56789 ; =0xddd5
+; CHECKED-NEXT:    mov x17, #56789
 ; CHECKED-NEXT:    pacdb x16, x17
 ; CHECKED-NEXT:  Lresign_end_1:
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_blend_and_const:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    mov x17, x1
 ; TRAP-NEXT:    movk x17, #12345, lsl #48
@@ -150,10 +165,10 @@ define i64 @test_resign_blend_and_const(i64 %arg, i64 %arg1) {
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_2
+; TRAP-NEXT:    b.eq [[L]]auth_success_2
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_2:
-; TRAP-NEXT:    mov x17, #56789 ; =0xddd5
+; TRAP-NEXT:    mov x17, #56789
 ; TRAP-NEXT:    pacdb x16, x17
 ; TRAP-NEXT:    mov x0, x16
 ; TRAP-NEXT:    ret
@@ -164,7 +179,7 @@ define i64 @test_resign_blend_and_const(i64 %arg, i64 %arg1) {
 
 define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_blend_and_addr:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    mov x17, x1
 ; UNCHECKED-NEXT:    movk x17, #12345, lsl #48
@@ -174,7 +189,7 @@ define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_blend_and_addr:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    mov x17, x1
 ; CHECKED-NEXT:    movk x17, #12345, lsl #48
@@ -182,9 +197,9 @@ define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_2
+; CHECKED-NEXT:    b.eq [[L]]auth_success_2
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_2
+; CHECKED-NEXT:    b [[L]]resign_end_2
 ; CHECKED-NEXT:  Lauth_success_2:
 ; CHECKED-NEXT:    pacdb x16, x2
 ; CHECKED-NEXT:  Lresign_end_2:
@@ -192,7 +207,7 @@ define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_blend_and_addr:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    mov x17, x1
 ; TRAP-NEXT:    movk x17, #12345, lsl #48
@@ -200,7 +215,7 @@ define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_3
+; TRAP-NEXT:    b.eq [[L]]auth_success_3
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_3:
 ; TRAP-NEXT:    pacdb x16, x2
@@ -212,38 +227,44 @@ define i64 @test_resign_blend_and_addr(i64 %arg, i64 %arg1, i64 %arg2) {
 }
 
 define i64 @test_auth_too_large_discriminator(i64 %arg, i64 %arg1) {
-; UNCHECKED-LABEL: test_auth_too_large_discriminator:
-; UNCHECKED:       ; %bb.0:
-; UNCHECKED-NEXT:    mov w8, #65536 ; =0x10000
-; UNCHECKED-NEXT:    bfi x1, x8, #48, #16
-; UNCHECKED-NEXT:    mov x16, x0
-; UNCHECKED-NEXT:    autda x16, x1
-; UNCHECKED-NEXT:    mov x0, x16
-; UNCHECKED-NEXT:    ret
+; UNCHECKED-LABEL:     test_auth_too_large_discriminator:
+; UNCHECKED:           %bb.0:
+; UNCHECKED-NEXT:        mov w8, #65536
+; UNCHECKED-DARWIN-NEXT: bfi x1, x8, #48, #16
+; UNCHECKED-DARWIN-NEXT: mov x16, x0
+; UNCHECKED-ELF-NEXT:    mov x16, x0
+; UNCHECKED-ELF-NEXT:    bfi x1, x8, #48, #16
+; UNCHECKED-NEXT:        autda x16, x1
+; UNCHECKED-NEXT:        mov x0, x16
+; UNCHECKED-NEXT:        ret
 ;
 ; CHECKED-LABEL: test_auth_too_large_discriminator:
-; CHECKED:       ; %bb.0:
-; CHECKED-NEXT:    mov w8, #65536 ; =0x10000
-; CHECKED-NEXT:    bfi x1, x8, #48, #16
-; CHECKED-NEXT:    mov x16, x0
-; CHECKED-NEXT:    autda x16, x1
-; CHECKED-NEXT:    mov x0, x16
-; CHECKED-NEXT:    ret
+; CHECKED:           %bb.0:
+; CHECKED-NEXT:        mov w8, #65536
+; CHECKED-DARWIN-NEXT: bfi x1, x8, #48, #16
+; CHECKED-DARWIN-NEXT: mov x16, x0
+; CHECKED-ELF-NEXT:    mov x16, x0
+; CHECKED-ELF-NEXT:    bfi x1, x8, #48, #16
+; CHECKED-NEXT:        autda x16, x1
+; CHECKED-NEXT:        mov x0, x16
+; CHECKED-NEXT:        ret
 ;
 ; TRAP-LABEL: test_auth_too_large_discriminator:
-; TRAP:       ; %bb.0:
-; TRAP-NEXT:    mov w8, #65536 ; =0x10000
-; TRAP-NEXT:    bfi x1, x8, #48, #16
-; TRAP-NEXT:    mov x16, x0
-; TRAP-NEXT:    autda x16, x1
-; TRAP-NEXT:    mov x17, x16
-; TRAP-NEXT:    xpacd x17
-; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_4
-; TRAP-NEXT:    brk #0xc472
-; TRAP-NEXT:  Lauth_success_4:
-; TRAP-NEXT:    mov x0, x16
-; TRAP-NEXT:    ret
+; TRAP:           %bb.0:
+; TRAP-NEXT:        mov w8, #65536
+; TRAP-DARWIN-NEXT: bfi x1, x8, #48, #16
+; TRAP-DARWIN-NEXT: mov x16, x0
+; TRAP-ELF-NEXT:    mov x16, x0
+; TRAP-ELF-NEXT:    bfi x1, x8, #48, #16
+; TRAP-NEXT:        autda x16, x1
+; TRAP-NEXT:        mov x17, x16
+; TRAP-NEXT:        xpacd x17
+; TRAP-NEXT:        cmp x16, x17
+; TRAP-NEXT:        b.eq [[L]]auth_success_4
+; TRAP-NEXT:        brk #0xc472
+; TRAP-NEXT:      Lauth_success_4:
+; TRAP-NEXT:        mov x0, x16
+; TRAP-NEXT:        ret
   %tmp0 = call i64 @llvm.ptrauth.blend(i64 %arg1, i64 65536)
   %tmp1 = call i64 @llvm.ptrauth.auth(i64 %arg, i32 2, i64 %tmp0)
   ret i64 %tmp1
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign.ll b/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign.ll
index 62c9fba853adb..fdd5ae29f35ea 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-intrinsic-auth-resign.ll
@@ -1,44 +1,59 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s --check-prefix=UNCHECKED
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL="L" --check-prefix=UNCHECKED
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s --check-prefix=UNCHECKED
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL="L" --check-prefix=UNCHECKED
 
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:                                     | FileCheck %s --check-prefix=CHECKED
+; RUN:                                     | FileCheck %s -DL="L" --check-prefix=CHECKED
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:                                     | FileCheck %s --check-prefix=CHECKED
+; RUN:                                     | FileCheck %s -DL="L" --check-prefix=CHECKED
 
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel=0                    -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s --check-prefix=TRAP
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL="L" --check-prefix=TRAP
 ; RUN: llc < %s -mtriple arm64e-apple-darwin -global-isel -global-isel-abort=1 -verify-machineinstrs \
-; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s --check-prefix=TRAP
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL="L" --check-prefix=TRAP
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL=".L" --check-prefix=UNCHECKED
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=none | FileCheck %s -DL=".L" --check-prefix=UNCHECKED
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:                                     | FileCheck %s -DL=".L" --check-prefix=CHECKED
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:                                     | FileCheck %s -DL=".L" --check-prefix=CHECKED
+
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel=0                    -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL=".L" --check-prefix=TRAP
+; RUN: llc < %s -mtriple aarch64-linux-gnu -mattr=+pauth -global-isel -global-isel-abort=1 -verify-machineinstrs \
+; RUN:   -aarch64-ptrauth-auth-checks=trap | FileCheck %s -DL=".L" --check-prefix=TRAP
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 define i64 @test_auth_ia(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_auth_ia:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autia x16, x1
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_ia:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autia x16, x1
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_ia:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autia x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_0
+; TRAP-NEXT:    b.eq [[L]]auth_success_0
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_0:
 ; TRAP-NEXT:    mov x0, x16
@@ -49,27 +64,27 @@ define i64 @test_auth_ia(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_ia_zero(i64 %arg) {
 ; UNCHECKED-LABEL: test_auth_ia_zero:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autiza x16
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_ia_zero:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autiza x16
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_ia_zero:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autiza x16
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_1
+; TRAP-NEXT:    b.eq [[L]]auth_success_1
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_1:
 ; TRAP-NEXT:    mov x0, x16
@@ -80,27 +95,27 @@ define i64 @test_auth_ia_zero(i64 %arg) {
 
 define i64 @test_auth_ib(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_auth_ib:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autib x16, x1
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_ib:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autib x16, x1
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_ib:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autib x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_2
+; TRAP-NEXT:    b.eq [[L]]auth_success_2
 ; TRAP-NEXT:    brk #0xc471
 ; TRAP-NEXT:  Lauth_success_2:
 ; TRAP-NEXT:    mov x0, x16
@@ -111,27 +126,27 @@ define i64 @test_auth_ib(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_ib_zero(i64 %arg) {
 ; UNCHECKED-LABEL: test_auth_ib_zero:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autizb x16
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_ib_zero:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autizb x16
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_ib_zero:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autizb x16
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_3
+; TRAP-NEXT:    b.eq [[L]]auth_success_3
 ; TRAP-NEXT:    brk #0xc471
 ; TRAP-NEXT:  Lauth_success_3:
 ; TRAP-NEXT:    mov x0, x16
@@ -142,27 +157,27 @@ define i64 @test_auth_ib_zero(i64 %arg) {
 
 define i64 @test_auth_da(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_auth_da:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autda x16, x1
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_da:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autda x16, x1
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_da:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autda x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_4
+; TRAP-NEXT:    b.eq [[L]]auth_success_4
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_4:
 ; TRAP-NEXT:    mov x0, x16
@@ -173,27 +188,27 @@ define i64 @test_auth_da(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_da_zero(i64 %arg) {
 ; UNCHECKED-LABEL: test_auth_da_zero:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autdza x16
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_da_zero:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autdza x16
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_da_zero:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autdza x16
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_5
+; TRAP-NEXT:    b.eq [[L]]auth_success_5
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_5:
 ; TRAP-NEXT:    mov x0, x16
@@ -204,27 +219,27 @@ define i64 @test_auth_da_zero(i64 %arg) {
 
 define i64 @test_auth_db(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_auth_db:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autdb x16, x1
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_db:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autdb x16, x1
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_db:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autdb x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_6
+; TRAP-NEXT:    b.eq [[L]]auth_success_6
 ; TRAP-NEXT:    brk #0xc473
 ; TRAP-NEXT:  Lauth_success_6:
 ; TRAP-NEXT:    mov x0, x16
@@ -235,27 +250,27 @@ define i64 @test_auth_db(i64 %arg, i64 %arg1) {
 
 define i64 @test_auth_db_zero(i64 %arg) {
 ; UNCHECKED-LABEL: test_auth_db_zero:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autdzb x16
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_db_zero:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autdzb x16
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_db_zero:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autdzb x16
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_7
+; TRAP-NEXT:    b.eq [[L]]auth_success_7
 ; TRAP-NEXT:    brk #0xc473
 ; TRAP-NEXT:  Lauth_success_7:
 ; TRAP-NEXT:    mov x0, x16
@@ -268,7 +283,7 @@ define i64 @test_auth_db_zero(i64 %arg) {
 ;; the validity of a signature.
 define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_ia_ia:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autia x16, x1
 ; UNCHECKED-NEXT:    pacia x16, x2
@@ -276,15 +291,15 @@ define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_ia_ia:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autia x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpaci x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_0
+; CHECKED-NEXT:    b.eq [[L]]auth_success_0
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_0
+; CHECKED-NEXT:    b [[L]]resign_end_0
 ; CHECKED-NEXT:  Lauth_success_0:
 ; CHECKED-NEXT:    pacia x16, x2
 ; CHECKED-NEXT:  Lresign_end_0:
@@ -292,13 +307,13 @@ define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_ia_ia:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autia x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_8
+; TRAP-NEXT:    b.eq [[L]]auth_success_8
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_8:
 ; TRAP-NEXT:    pacia x16, x2
@@ -310,7 +325,7 @@ define i64 @test_resign_ia_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_ib_ia:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autib x16, x1
 ; UNCHECKED-NEXT:    pacia x16, x2
@@ -318,15 +333,15 @@ define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_ib_ia:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autib x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpaci x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_1
+; CHECKED-NEXT:    b.eq [[L]]auth_success_1
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_1
+; CHECKED-NEXT:    b [[L]]resign_end_1
 ; CHECKED-NEXT:  Lauth_success_1:
 ; CHECKED-NEXT:    pacia x16, x2
 ; CHECKED-NEXT:  Lresign_end_1:
@@ -334,13 +349,13 @@ define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_ib_ia:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autib x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_9
+; TRAP-NEXT:    b.eq [[L]]auth_success_9
 ; TRAP-NEXT:    brk #0xc471
 ; TRAP-NEXT:  Lauth_success_9:
 ; TRAP-NEXT:    pacia x16, x2
@@ -352,7 +367,7 @@ define i64 @test_resign_ib_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_da_ia:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autda x16, x1
 ; UNCHECKED-NEXT:    pacia x16, x2
@@ -360,15 +375,15 @@ define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_da_ia:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autda x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_2
+; CHECKED-NEXT:    b.eq [[L]]auth_success_2
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_2
+; CHECKED-NEXT:    b [[L]]resign_end_2
 ; CHECKED-NEXT:  Lauth_success_2:
 ; CHECKED-NEXT:    pacia x16, x2
 ; CHECKED-NEXT:  Lresign_end_2:
@@ -376,13 +391,13 @@ define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_da_ia:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autda x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_10
+; TRAP-NEXT:    b.eq [[L]]auth_success_10
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_10:
 ; TRAP-NEXT:    pacia x16, x2
@@ -394,7 +409,7 @@ define i64 @test_resign_da_ia(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_db_da:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autdb x16, x1
 ; UNCHECKED-NEXT:    pacda x16, x2
@@ -402,15 +417,15 @@ define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_db_da:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autdb x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_3
+; CHECKED-NEXT:    b.eq [[L]]auth_success_3
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_3
+; CHECKED-NEXT:    b [[L]]resign_end_3
 ; CHECKED-NEXT:  Lauth_success_3:
 ; CHECKED-NEXT:    pacda x16, x2
 ; CHECKED-NEXT:  Lresign_end_3:
@@ -418,13 +433,13 @@ define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_db_da:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autdb x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_11
+; TRAP-NEXT:    b.eq [[L]]auth_success_11
 ; TRAP-NEXT:    brk #0xc473
 ; TRAP-NEXT:  Lauth_success_11:
 ; TRAP-NEXT:    pacda x16, x2
@@ -436,7 +451,7 @@ define i64 @test_resign_db_da(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_iza_db:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autiza x16
 ; UNCHECKED-NEXT:    pacdb x16, x2
@@ -444,15 +459,15 @@ define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_iza_db:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autiza x16
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpaci x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_4
+; CHECKED-NEXT:    b.eq [[L]]auth_success_4
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_4
+; CHECKED-NEXT:    b [[L]]resign_end_4
 ; CHECKED-NEXT:  Lauth_success_4:
 ; CHECKED-NEXT:    pacdb x16, x2
 ; CHECKED-NEXT:  Lresign_end_4:
@@ -460,13 +475,13 @@ define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_iza_db:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autiza x16
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_12
+; TRAP-NEXT:    b.eq [[L]]auth_success_12
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_12:
 ; TRAP-NEXT:    pacdb x16, x2
@@ -478,7 +493,7 @@ define i64 @test_resign_iza_db(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-LABEL: test_resign_da_dzb:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autda x16, x1
 ; UNCHECKED-NEXT:    pacdzb x16
@@ -486,15 +501,15 @@ define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_da_dzb:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autda x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_5
+; CHECKED-NEXT:    b.eq [[L]]auth_success_5
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_5
+; CHECKED-NEXT:    b [[L]]resign_end_5
 ; CHECKED-NEXT:  Lauth_success_5:
 ; CHECKED-NEXT:    pacdzb x16
 ; CHECKED-NEXT:  Lresign_end_5:
@@ -502,13 +517,13 @@ define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_da_dzb:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autda x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_13
+; TRAP-NEXT:    b.eq [[L]]auth_success_13
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_13:
 ; TRAP-NEXT:    pacdzb x16
@@ -520,33 +535,33 @@ define i64 @test_resign_da_dzb(i64 %arg, i64 %arg1, i64 %arg2) {
 
 define i64 @test_auth_trap_attribute(i64 %arg, i64 %arg1) "ptrauth-auth-traps" {
 ; UNCHECKED-LABEL: test_auth_trap_attribute:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autia x16, x1
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_trap_attribute:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autia x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpaci x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_6
+; CHECKED-NEXT:    b.eq [[L]]auth_success_6
 ; CHECKED-NEXT:    brk #0xc470
 ; CHECKED-NEXT:  Lauth_success_6:
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_trap_attribute:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autia x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_14
+; TRAP-NEXT:    b.eq [[L]]auth_success_14
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_14:
 ; TRAP-NEXT:    mov x0, x16
@@ -557,30 +572,30 @@ define i64 @test_auth_trap_attribute(i64 %arg, i64 %arg1) "ptrauth-auth-traps" {
 
 define i64 @test_auth_ia_constdisc(i64 %arg) {
 ; UNCHECKED-LABEL: test_auth_ia_constdisc:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
-; UNCHECKED-NEXT:    mov x17, #256 ; =0x100
+; UNCHECKED-NEXT:    mov x17, #256
 ; UNCHECKED-NEXT:    autia x16, x17
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_auth_ia_constdisc:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
-; CHECKED-NEXT:    mov x17, #256 ; =0x100
+; CHECKED-NEXT:    mov x17, #256
 ; CHECKED-NEXT:    autia x16, x17
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_auth_ia_constdisc:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
-; TRAP-NEXT:    mov x17, #256 ; =0x100
+; TRAP-NEXT:    mov x17, #256
 ; TRAP-NEXT:    autia x16, x17
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpaci x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_15
+; TRAP-NEXT:    b.eq [[L]]auth_success_15
 ; TRAP-NEXT:    brk #0xc470
 ; TRAP-NEXT:  Lauth_success_15:
 ; TRAP-NEXT:    mov x0, x16
@@ -591,42 +606,42 @@ define i64 @test_auth_ia_constdisc(i64 %arg) {
 
 define i64 @test_resign_da_constdisc(i64 %arg, i64 %arg1) {
 ; UNCHECKED-LABEL: test_resign_da_constdisc:
-; UNCHECKED:       ; %bb.0:
+; UNCHECKED:       %bb.0:
 ; UNCHECKED-NEXT:    mov x16, x0
 ; UNCHECKED-NEXT:    autda x16, x1
-; UNCHECKED-NEXT:    mov x17, #256 ; =0x100
+; UNCHECKED-NEXT:    mov x17, #256
 ; UNCHECKED-NEXT:    pacda x16, x17
 ; UNCHECKED-NEXT:    mov x0, x16
 ; UNCHECKED-NEXT:    ret
 ;
 ; CHECKED-LABEL: test_resign_da_constdisc:
-; CHECKED:       ; %bb.0:
+; CHECKED:       %bb.0:
 ; CHECKED-NEXT:    mov x16, x0
 ; CHECKED-NEXT:    autda x16, x1
 ; CHECKED-NEXT:    mov x17, x16
 ; CHECKED-NEXT:    xpacd x17
 ; CHECKED-NEXT:    cmp x16, x17
-; CHECKED-NEXT:    b.eq Lauth_success_7
+; CHECKED-NEXT:    b.eq [[L]]auth_success_7
 ; CHECKED-NEXT:    mov x16, x17
-; CHECKED-NEXT:    b Lresign_end_6
+; CHECKED-NEXT:    b [[L]]resign_end_6
 ; CHECKED-NEXT:  Lauth_success_7:
-; CHECKED-NEXT:    mov x17, #256 ; =0x100
+; CHECKED-NEXT:    mov x17, #256
 ; CHECKED-NEXT:    pacda x16, x17
 ; CHECKED-NEXT:  Lresign_end_6:
 ; CHECKED-NEXT:    mov x0, x16
 ; CHECKED-NEXT:    ret
 ;
 ; TRAP-LABEL: test_resign_da_constdisc:
-; TRAP:       ; %bb.0:
+; TRAP:       %bb.0:
 ; TRAP-NEXT:    mov x16, x0
 ; TRAP-NEXT:    autda x16, x1
 ; TRAP-NEXT:    mov x17, x16
 ; TRAP-NEXT:    xpacd x17
 ; TRAP-NEXT:    cmp x16, x17
-; TRAP-NEXT:    b.eq Lauth_success_16
+; TRAP-NEXT:    b.eq [[L]]auth_success_16
 ; TRAP-NEXT:    brk #0xc472
 ; TRAP-NEXT:  Lauth_success_16:
-; TRAP-NEXT:    mov x17, #256 ; =0x100
+; TRAP-NEXT:    mov x17, #256
 ; TRAP-NEXT:    pacda x16, x17
 ; TRAP-NEXT:    mov x0, x16
 ; TRAP-NEXT:    ret

From 1af23c548197ba8325c35e1edd6fa1be456af57f Mon Sep 17 00:00:00 2001
From: Daniil Kovalev <dkovalev@accesssoftek.com>
Date: Thu, 25 Jul 2024 00:24:50 +0300
Subject: [PATCH 067/427] [PAC][clang][test] Implement missing tests for some
 PAuth features (#100206)

Implement tests for the following PAuth-related features:

- driver, preprocessor and ELF codegen tests for type_info vtable
pointer discrimination #99726;

- driver, preprocessor, and ELF codegen (emitting function attributes) +
sema (emitting errors) tests for indirect gotos signing #97647;

- ELF codegen tests for ubsan type checks + auth #99590;

- ELF codegen tests for constant global init with polymorphic MI #99741;

- ELF codegen tests for C++ member function pointers auth #99576.

(cherry picked from commit 70c6e79e6d3e897418f3556a25e22e66ff018dc4)
---
 clang/lib/Driver/ToolChains/Clang.cpp         |  3 +
 .../CodeGen/ptrauth-function-attributes.c     |  5 +-
 clang/test/CodeGen/ubsan-function.cpp         |  7 +-
 .../ptrauth-global-constant-initializers.cpp  | 77 +++++++++++--------
 .../ptrauth-member-function-pointer.cpp       | 55 +++++++------
 .../CodeGenCXX/ptrauth-type-info-vtable.cpp   | 17 +++-
 clang/test/Driver/aarch64-ptrauth.c           |  9 ++-
 clang/test/Preprocessor/ptrauth_feature.c     | 36 +++++++--
 clang/test/Sema/ptrauth-indirect-goto.c       |  1 +
 9 files changed, 137 insertions(+), 73 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 5de29f1eca614..366b147a052bf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1847,6 +1847,9 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
   Args.addOptInFlag(
       CmdArgs, options::OPT_fptrauth_vtable_pointer_type_discrimination,
       options::OPT_fno_ptrauth_vtable_pointer_type_discrimination);
+  Args.addOptInFlag(
+      CmdArgs, options::OPT_fptrauth_type_info_vtable_pointer_discrimination,
+      options::OPT_fno_ptrauth_type_info_vtable_pointer_discrimination);
   Args.addOptInFlag(CmdArgs, options::OPT_fptrauth_init_fini,
                     options::OPT_fno_ptrauth_init_fini);
   Args.addOptInFlag(
diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c b/clang/test/CodeGen/ptrauth-function-attributes.c
index 7f93ccc7c4bce..6a09cd37bf485 100644
--- a/clang/test/CodeGen/ptrauth-function-attributes.c
+++ b/clang/test/CodeGen/ptrauth-function-attributes.c
@@ -4,8 +4,9 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios  -fptrauth-calls   -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,CALLS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls  -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,CALLS
 
-// RUN: %clang_cc1 -triple arm64-apple-ios  -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
-// RUN: %clang_cc1 -triple arm64e-apple-ios -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
+// RUN: %clang_cc1 -triple arm64e-apple-ios  -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
 
 // ALL: define {{(dso_local )?}}void @test() #0
 void test() {
diff --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp
index 8478f05a10b78..76d4237383f83 100644
--- a/clang/test/CodeGen/ubsan-function.cpp
+++ b/clang/test/CodeGen/ubsan-function.cpp
@@ -4,7 +4,8 @@
 // RUN: %clang_cc1 -triple aarch64_be-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,GNU,64
 // RUN: %clang_cc1 -triple arm-none-eabi -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,ARM,GNU,32
 
-// RUN: %clang_cc1 -triple arm64e-apple-ios -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all -fptrauth-calls | FileCheck %s --check-prefixes=CHECK,GNU,64,64e
+// RUN: %clang_cc1 -triple arm64e-apple-ios  -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all -fptrauth-calls | FileCheck %s --check-prefixes=CHECK,GNU,64,AUTH
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all -fptrauth-calls | FileCheck %s --check-prefixes=CHECK,GNU,64,AUTH
 
 // GNU:  define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] {
 // MSVC: define{{.*}} void @"?fun@@YAXXZ"() #0 !func_sanitize ![[FUNCSAN:.*]] {
@@ -15,8 +16,8 @@ void fun() {}
 // ARM:   ptrtoint ptr {{.*}} to i32, !nosanitize !5
 // ARM:   and i32 {{.*}}, -2, !nosanitize !5
 // ARM:   inttoptr i32 {{.*}} to ptr, !nosanitize !5
-// 64e:   %[[STRIPPED:.*]] = ptrtoint ptr {{.*}} to i64, !nosanitize
-// 64e:   call i64 @llvm.ptrauth.auth(i64 %[[STRIPPED]], i32 0, i64 0), !nosanitize
+// AUTH:  %[[STRIPPED:.*]] = ptrtoint ptr {{.*}} to i64, !nosanitize
+// AUTH:  call i64 @llvm.ptrauth.auth(i64 %[[STRIPPED]], i32 0, i64 0), !nosanitize
 // CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 -1, i32 0, !nosanitize
 // CHECK: load i32, ptr {{.*}}, align {{.*}}, !nosanitize
 // CHECK: icmp eq i32 {{.*}}, -1056584962, !nosanitize
diff --git a/clang/test/CodeGenCXX/ptrauth-global-constant-initializers.cpp b/clang/test/CodeGenCXX/ptrauth-global-constant-initializers.cpp
index f0c3ea83d8958..9ce9def6156ef 100644
--- a/clang/test/CodeGenCXX/ptrauth-global-constant-initializers.cpp
+++ b/clang/test/CodeGenCXX/ptrauth-global-constant-initializers.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fno-rtti -fptrauth-vtable-pointer-type-discrimination -fptrauth-vtable-pointer-address-discrimination -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fno-rtti -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fptrauth-vtable-pointer-address-discrimination -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,DARWIN
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fno-rtti -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fptrauth-vtable-pointer-address-discrimination -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,ELF
 
 // CHECK: %struct.Base1 = type { ptr }
 // CHECK: %struct.Base2 = type { ptr }
@@ -6,27 +9,27 @@
 // CHECK: %struct.Derived2 = type { %struct.Base2, %struct.Base1 }
 // CHECK: %struct.Derived3 = type { %struct.Base1, %struct.Base2 }
 
-// CHECK: @_ZTV5Base1 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC:38871]], ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV5Base1, i32 0, i32 0, i32 2))] }, align 8
-// CHECK: @g_b1 = global %struct.Base1 { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [3 x ptr] }, ptr @_ZTV5Base1, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC:6511]], ptr @g_b1) }, align 8
-// CHECK: @_ZTV5Base2 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC:27651]], ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV5Base2, i32 0, i32 0, i32 2))] }, align 8
-// CHECK: @g_b2 = global %struct.Base2 { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [3 x ptr] }, ptr @_ZTV5Base2, i32 0, i32 0, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC:63631]], ptr @g_b2) }, align 8
-// CHECK: @_ZTV8Derived1 = linkonce_odr unnamed_addr constant { [5 x ptr], [3 x ptr] } { [5 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived11cEv, i32 0, i64 [[DERIVED1_C_DISC:54092]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN8Derived11dEv, i32 0, i64 [[DERIVED1_D_DISC:37391]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 4))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 1, i32 2))] }, align 8
-// CHECK: @g_d1 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 24) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr @g_d1), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 1, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d1, i32 0, i32 1)) }, align 8
-// CHECK: @_ZTV8Derived2 = linkonce_odr unnamed_addr constant { [5 x ptr], [3 x ptr] } { [5 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived21cEv, i32 0, i64 [[DERIVED2_C_DISC:15537]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN8Derived21eEv, i32 0, i64 209, ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 4))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 1, i32 2))] }, align 8
-// CHECK: @g_d2 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 24) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr @g_d2), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 1, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d2, i32 0, i32 1)) }, align 8
-// CHECK: @_ZTV8Derived3 = linkonce_odr unnamed_addr constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived31iEv, i32 0, i64 [[DERIVED3_I_DISC:19084]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 3))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 1, i32 2))] }, align 8
-// CHECK: @g_d3 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 16) ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr @g_d3), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 1, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d3, i32 0, i32 1)) }, align 8
-// CHECK: @g_vb1 = global %struct.VirtualBase1 zeroinitializer, align 8
-// CHECK: @g_vb2 = global %struct.VirtualBase2 zeroinitializer, align 8
-// CHECK: @g_d4 = global %struct.Derived4 zeroinitializer, align 8
-// CHECK: @_ZTV12VirtualBase1 = linkonce_odr unnamed_addr constant { [6 x ptr] } { [6 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC:7987]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 5))] }, align 8
-// CHECK: @_ZTT12VirtualBase1 = linkonce_odr unnamed_addr constant [2 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4), i32 2)], align 8
-// CHECK: @_ZTV12VirtualBase2 = linkonce_odr unnamed_addr constant { [5 x ptr], [4 x ptr] } { [5 x ptr] [ptr inttoptr (i64 8 to ptr), ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC:51224]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 4))], [4 x ptr] [ptr null, ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 1, i32 3))] }, align 8
-// CHECK: @_ZTT12VirtualBase2 = linkonce_odr unnamed_addr constant [2 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 8) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 1, i32 3), i32 2)], align 8
-// CHECK: @_ZTV8Derived4 = linkonce_odr unnamed_addr constant { [7 x ptr], [5 x ptr] } { [7 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 5)), ptr ptrauth (ptr @_ZN8Derived41hEv, i32 0, i64 [[DERIVED4_H_DISC:31844]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 6))], [5 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 4))] }, align 8
-// CHECK: @_ZTT8Derived4 = linkonce_odr unnamed_addr constant [7 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-32, 24) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 8) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 1, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 24) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 3), i32 2)], align 8
-// CHECK: @_ZTC8Derived40_12VirtualBase1 = linkonce_odr unnamed_addr constant { [6 x ptr] } { [6 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 5))] }, align 8
-// CHECK: @_ZTC8Derived48_12VirtualBase2 = linkonce_odr unnamed_addr constant { [5 x ptr], [4 x ptr] } { [5 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 4))], [4 x ptr] [ptr null, ptr inttoptr (i64 8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 1, i32 3))] }, align 8
+// CHECK: @_ZTV5Base1 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC:38871]], ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV5Base1, i32 0, i32 0, i32 2))] },{{.*}} align 8
+// CHECK: @g_b1 = global %struct.Base1 { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [3 x ptr] }, ptr @_ZTV5Base1, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC:6511]], ptr @g_b1) },{{.*}} align 8
+// CHECK: @_ZTV5Base2 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC:27651]], ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTV5Base2, i32 0, i32 0, i32 2))] },{{.*}} align 8
+// CHECK: @g_b2 = global %struct.Base2 { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [3 x ptr] }, ptr @_ZTV5Base2, i32 0, i32 0, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC:63631]], ptr @g_b2) },{{.*}} align 8
+// CHECK: @_ZTV8Derived1 = linkonce_odr unnamed_addr constant { [5 x ptr], [3 x ptr] } { [5 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived11cEv, i32 0, i64 [[DERIVED1_C_DISC:54092]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN8Derived11dEv, i32 0, i64 [[DERIVED1_D_DISC:37391]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 4))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 1, i32 2))] },{{.*}} align 8
+// CHECK: @g_d1 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 24) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr @g_d1), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived1, i32 0, i32 1, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d1, i32 0, i32 1)) },{{.*}} align 8
+// CHECK: @_ZTV8Derived2 = linkonce_odr unnamed_addr constant { [5 x ptr], [3 x ptr] } { [5 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived21cEv, i32 0, i64 [[DERIVED2_C_DISC:15537]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN8Derived21eEv, i32 0, i64 209, ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 4))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 1, i32 2))] },{{.*}} align 8
+// CHECK: @g_d2 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 24) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 0, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr @g_d2), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [5 x ptr], [3 x ptr] }, ptr @_ZTV8Derived2, i32 0, i32 1, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d2, i32 0, i32 1)) },{{.*}} align 8
+// CHECK: @_ZTV8Derived3 = linkonce_odr unnamed_addr constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 2)), ptr ptrauth (ptr @_ZN8Derived31iEv, i32 0, i64 [[DERIVED3_I_DISC:19084]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 3))], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 1, i32 2))] },{{.*}} align 8
+// CHECK: @g_d3 = global { ptr, ptr } { ptr ptrauth (ptr getelementptr inbounds inrange(-16, 16) ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 0, i32 2), i32 2, i64 [[BASE1_VTABLE_DISC]], ptr @g_d3), ptr ptrauth (ptr getelementptr inbounds inrange(-16, 8) ({ [4 x ptr], [3 x ptr] }, ptr @_ZTV8Derived3, i32 0, i32 1, i32 2), i32 2, i64 [[BASE2_VTABLE_DISC]], ptr getelementptr inbounds ({ ptr, ptr }, ptr @g_d3, i32 0, i32 1)) },{{.*}} align 8
+// CHECK: @g_vb1 = global %struct.VirtualBase1 zeroinitializer,{{.*}} align 8
+// CHECK: @g_vb2 = global %struct.VirtualBase2 zeroinitializer,{{.*}} align 8
+// CHECK: @g_d4 = global %struct.Derived4 zeroinitializer,{{.*}} align 8
+// CHECK: @_ZTV12VirtualBase1 = linkonce_odr unnamed_addr constant { [6 x ptr] } { [6 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC:7987]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 5))] },{{.*}} align 8
+// CHECK: @_ZTT12VirtualBase1 = linkonce_odr unnamed_addr constant [2 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTV12VirtualBase1, i32 0, i32 0, i32 4), i32 2)],{{.*}} align 8
+// CHECK: @_ZTV12VirtualBase2 = linkonce_odr unnamed_addr constant { [5 x ptr], [4 x ptr] } { [5 x ptr] [ptr inttoptr (i64 8 to ptr), ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC:51224]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 4))], [4 x ptr] [ptr null, ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 1, i32 3))] },{{.*}} align 8
+// CHECK: @_ZTT12VirtualBase2 = linkonce_odr unnamed_addr constant [2 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 0, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 8) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTV12VirtualBase2, i32 0, i32 1, i32 3), i32 2)],{{.*}} align 8
+// CHECK: @_ZTV8Derived4 = linkonce_odr unnamed_addr constant { [7 x ptr], [5 x ptr] } { [7 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 5)), ptr ptrauth (ptr @_ZN8Derived41hEv, i32 0, i64 [[DERIVED4_H_DISC:31844]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 6))], [5 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr inttoptr (i64 -8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC]], ptr getelementptr inbounds ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 4))] },{{.*}} align 8
+// CHECK: @_ZTT8Derived4 = linkonce_odr unnamed_addr constant [7 x ptr] [ptr ptrauth (ptr getelementptr inbounds inrange(-32, 24) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 16) ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 8) ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 1, i32 3), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-32, 24) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 0, i32 4), i32 2), ptr ptrauth (ptr getelementptr inbounds inrange(-24, 16) ({ [7 x ptr], [5 x ptr] }, ptr @_ZTV8Derived4, i32 0, i32 1, i32 3), i32 2)],{{.*}} align 8
+// CHECK: @_ZTC8Derived40_12VirtualBase1 = linkonce_odr unnamed_addr constant { [6 x ptr] } { [6 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 4)), ptr ptrauth (ptr @_ZN12VirtualBase11fEv, i32 0, i64 [[VIRTUALBASE1_F_DISC]], ptr getelementptr inbounds ({ [6 x ptr] }, ptr @_ZTC8Derived40_12VirtualBase1, i32 0, i32 0, i32 5))] },{{.*}} align 8
+// CHECK: @_ZTC8Derived48_12VirtualBase2 = linkonce_odr unnamed_addr constant { [5 x ptr], [4 x ptr] } { [5 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr null, ptr ptrauth (ptr @_ZN5Base21bEv, i32 0, i64 [[BASE2_B_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 3)), ptr ptrauth (ptr @_ZN12VirtualBase21gEv, i32 0, i64 [[VIRTUALBASE2_G_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 0, i32 4))], [4 x ptr] [ptr null, ptr inttoptr (i64 8 to ptr), ptr null, ptr ptrauth (ptr @_ZN5Base11aEv, i32 0, i64 [[BASE1_A_DISC]], ptr getelementptr inbounds ({ [5 x ptr], [4 x ptr] }, ptr @_ZTC8Derived48_12VirtualBase2, i32 0, i32 1, i32 3))] },{{.*}} align 8
 
 struct Base1 { virtual void a() {} };
 struct Base2 { virtual void b() {} };
@@ -73,20 +76,24 @@ struct Derived5 : VirtualBase2, VirtualBase1 {
   virtual void h() {}
 };
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN12VirtualBase1C1Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN12VirtualBase1C1Ev
+// ELF-LABEL:    define {{.*}} void @_ZN12VirtualBase1C1Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN12VirtualBase2C1Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN12VirtualBase2C1Ev
+// ELF-LABEL:    define {{.*}} void @_ZN12VirtualBase2C1Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN8Derived4C1Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN8Derived4C1Ev
+// ELF-LABEL:    define {{.*}} void @_ZN8Derived4C1Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN8Derived5C1Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN8Derived5C1Ev
+// ELF-LABEL:    define {{.*}} void @_ZN8Derived5C1Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
@@ -155,7 +162,7 @@ extern "C" void cross_check_vtables(Base1 *b1,
   d5->h();
 }
 
-// CHECK-LABEL: define void @cross_check_vtables(
+// CHECK-LABEL: define{{.*}} void @cross_check_vtables(
 // CHECK: "; b1->a()",
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_A_DISC]])
@@ -214,21 +221,25 @@ extern "C" void cross_check_vtables(Base1 *b1,
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[DERIVED4_H_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN5Base1C2Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN5Base1C2Ev
+// ELF-LABEL:    define {{.*}} void @_ZN5Base1C2Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN5Base2C2Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN5Base2C2Ev
+// ELF-LABEL:    define {{.*}} void @_ZN5Base2C2Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN8Derived1C2Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN8Derived1C2Ev
+// ELF-LABEL:    define {{.*}} void @_ZN8Derived1C2Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN8Derived2C2Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN8Derived2C2Ev
+// ELF-LABEL:    define {{.*}} void @_ZN8Derived2C2Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 
-// CHECK-LABEL: define {{.*}} ptr @_ZN8Derived3C2Ev
+// DARWIN-LABEL: define {{.*}} ptr @_ZN8Derived3C2Ev
+// ELF-LABEL:    define {{.*}} void @_ZN8Derived3C2Ev
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE1_VTABLE_DISC]])
 // CHECK: call i64 @llvm.ptrauth.blend(i64 {{%.*}}, i64 [[BASE2_VTABLE_DISC]])
-
diff --git a/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp b/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
index 5e84e3e7bc5e9..0a9ac3fa510f5 100644
--- a/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
+++ b/clang/test/CodeGenCXX/ptrauth-member-function-pointer.cpp
@@ -1,8 +1,14 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG %s
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -debug-info-kind=limited -o - %s | FileCheck -check-prefixes=CHECK %s
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 1 -o - %s | FileCheck %s -check-prefix=STACK-PROT
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 2 -o - %s | FileCheck %s -check-prefix=STACK-PROT
-// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 3 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,DARWIN %s
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -debug-info-kind=limited -o - %s | FileCheck -check-prefixes=CHECK,DARWIN %s
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 1 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 2 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 3 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -o - %s | FileCheck -check-prefixes=CHECK,NODEBUG,ELF %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -debug-info-kind=limited -o - %s | FileCheck -check-prefixes=CHECK,ELF %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 1 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 2 -o - %s | FileCheck %s -check-prefix=STACK-PROT
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -fptrauth-intrinsics -emit-llvm -std=c++11 -O1 -disable-llvm-passes -stack-protector 3 -o - %s | FileCheck %s -check-prefix=STACK-PROT
 
 
 // CHECK: @gmethod0 = global { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN5Base011nonvirtual0Ev, i32 0, i64 [[TYPEDISC1:35591]]) to i64), i64 0 }, align 8
@@ -78,9 +84,9 @@ struct Class0 {
   MethodTy1 m0;
 };
 
-// CHECK: define void @_ZN5Base08virtual1Ev(
+// CHECK: define{{.*}} void @_ZN5Base08virtual1Ev(
 
-// CHECK: define void @_Z5test0v()
+// CHECK: define{{.*}} void @_Z5test0v()
 // CHECK: %[[METHOD0:.*]] = alloca { i64, i64 }, align 8
 // CHECK-NEXT: %[[VARMETHOD1:.*]] = alloca { i64, i64 }, align 8
 // CHECK-NEXT: %[[METHOD2:.*]] = alloca { i64, i64 }, align 8
@@ -246,7 +252,7 @@ void test0() {
   method7 = &Derived1::virtual1;
 }
 
-// CHECK: define void @_Z5test1P5Base0MS_FvvE(ptr noundef %[[A0:.*]], [2 x i64] %[[A1_COERCE:.*]])
+// CHECK: define{{.*}} void @_Z5test1P5Base0MS_FvvE(ptr noundef %[[A0:.*]], [2 x i64] %[[A1_COERCE:.*]])
 // CHECK: %[[A1:.*]] = alloca { i64, i64 }, align 8
 // CHECK: %[[A0_ADDR:.*]] = alloca ptr, align 8
 // CHECK: %[[A1_ADDR:.*]] = alloca { i64, i64 }, align 8
@@ -264,15 +270,16 @@ void test0() {
 // CHECK: %[[MEMPTR_ISVIRTUAL:.*]] = icmp ne i64 %[[V5]], 0
 // CHECK: br i1 %[[MEMPTR_ISVIRTUAL]]
 
-// CHECK: %[[VTABLE:.*]] = load ptr, ptr %[[V4]], align 8
-// CHECK: %[[V7:.*]] = ptrtoint ptr %[[VTABLE]] to i64
-// CHECK: %[[V8:.*]] = call i64 @llvm.ptrauth.auth(i64 %[[V7]], i32 2, i64 0)
-// CHECK: %[[V9:.*]] = inttoptr i64 %[[V8]] to ptr
-// CHECK: %[[V10:.*]] = trunc i64 %[[MEMPTR_PTR]] to i32
-// CHECK: %[[V11:.*]] = zext i32 %[[V10]] to i64
-// CHECK: %[[V12:.*]] = getelementptr i8, ptr %[[V9]], i64 %[[V11]]
-// CHECK: %[[MEMPTR_VIRTUALFN:.*]] = load ptr, ptr %[[V12]], align 8
-// CHECK: br
+// CHECK:  %[[VTABLE:.*]] = load ptr, ptr %[[V4]], align 8
+// CHECK:  %[[V7:.*]] = ptrtoint ptr %[[VTABLE]] to i64
+// CHECK:  %[[V8:.*]] = call i64 @llvm.ptrauth.auth(i64 %[[V7]], i32 2, i64 0)
+// CHECK:  %[[V9:.*]] = inttoptr i64 %[[V8]] to ptr
+// DARWIN: %[[V10:.*]] = trunc i64 %[[MEMPTR_PTR]] to i32
+// DARWIN: %[[V11:.*]] = zext i32 %[[V10]] to i64
+// DARWIN: %[[V12:.*]] = getelementptr i8, ptr %[[V9]], i64 %[[V11]]
+// ELF:    %[[V12:.*]] = getelementptr i8, ptr %[[V9]], i64 %[[MEMPTR_PTR]]
+// CHECK:  %[[MEMPTR_VIRTUALFN:.*]] = load ptr, ptr %[[V12]], align 8
+// CHECK:  br
 
 // CHECK: %[[MEMPTR_NONVIRTUALFN:.*]] = inttoptr i64 %[[MEMPTR_PTR]] to ptr
 // CHECK: br
@@ -286,7 +293,7 @@ void test1(Base0 *a0, MethodTy0 a1) {
   (a0->*a1)();
 }
 
-// CHECK: define void @_Z15testConversion0M5Base0FvvEM8Derived0FvvE([2 x i64] %[[METHOD0_COERCE:.*]], [2 x i64] %[[METHOD1_COERCE:.*]])
+// CHECK: define{{.*}} void @_Z15testConversion0M5Base0FvvEM8Derived0FvvE([2 x i64] %[[METHOD0_COERCE:.*]], [2 x i64] %[[METHOD1_COERCE:.*]])
 // CHECK: %[[METHOD0:.*]] = alloca { i64, i64 }, align 8
 // CHECK: %[[METHOD1:.*]] = alloca { i64, i64 }, align 8
 // CHECK: %[[METHOD0_ADDR:.*]] = alloca { i64, i64 }, align 8
@@ -326,21 +333,21 @@ void testConversion0(MethodTy0 method0, MethodTy1 method1) {
   method1 = method0;
 }
 
-// CHECK: define void @_Z15testConversion1M5Base0FvvE(
+// CHECK: define{{.*}} void @_Z15testConversion1M5Base0FvvE(
 // CHECK: call i64 @llvm.ptrauth.resign(i64 %{{.*}}, i32 0, i64 [[TYPEDISC0]], i32 0, i64 [[TYPEDISC1]])
 
 void testConversion1(MethodTy0 method0) {
   MethodTy1 method1 = reinterpret_cast<MethodTy1>(method0);
 }
 
-// CHECK: define void @_Z15testConversion2M8Derived0FvvE(
+// CHECK: define{{.*}} void @_Z15testConversion2M8Derived0FvvE(
 // CHECK: call i64 @llvm.ptrauth.resign(i64 %{{.*}}, i32 0, i64 [[TYPEDISC1]], i32 0, i64 [[TYPEDISC0]])
 
 void testConversion2(MethodTy1 method1) {
   MethodTy0 method0 = static_cast<MethodTy0>(method1);
 }
 
-// CHECK: define void @_Z15testConversion3M8Derived0FvvE(
+// CHECK: define{{.*}} void @_Z15testConversion3M8Derived0FvvE(
 // CHECK: call i64 @llvm.ptrauth.resign(i64 %{{.*}}, i32 0, i64 [[TYPEDISC1]], i32 0, i64 [[TYPEDISC0]])
 
 void testConversion3(MethodTy1 method1) {
@@ -350,7 +357,7 @@ void testConversion3(MethodTy1 method1) {
 // No need to call @llvm.ptrauth.resign if the source member function
 // pointer is a constant.
 
-// CHECK: define void @_Z15testConversion4v(
+// CHECK: define{{.*}} void @_Z15testConversion4v(
 // CHECK: %[[METHOD0:.*]] = alloca { i64, i64 }, align 8
 // CHECK: store { i64, i64 } { i64 ptrtoint (ptr ptrauth (ptr @_ZN5Base08virtual1Ev_vfpthunk_, i32 0, i64 [[TYPEDISC0]]) to i64), i64 0 }, ptr %[[METHOD0]], align 8
 // CHECK: ret void
@@ -396,7 +403,7 @@ MethodTy1 gmethod0 = reinterpret_cast<MethodTy1>(&Base0::nonvirtual0);
 MethodTy0 gmethod1 = reinterpret_cast<MethodTy0>(&Derived0::nonvirtual5);
 MethodTy0 gmethod2 = reinterpret_cast<MethodTy0>(&Derived0::virtual1);
 
-// CHECK-LABEL: define void @_Z13testArrayInitv()
+// CHECK-LABEL: define{{.*}} void @_Z13testArrayInitv()
 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %p0, ptr align 8 @__const._Z13testArrayInitv.p0, i64 16, i1 false)
 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %p1, ptr align 8 @__const._Z13testArrayInitv.p1, i64 16, i1 false)
 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %c0, ptr align 8 @__const._Z13testArrayInitv.c0, i64 16, i1 false)
@@ -424,7 +431,7 @@ void testArrayInit() {
 // STACK-PROT-NOT: sspreq
 // STACK-PROT-NEXT: attributes
 
-// CHECK: define void @_Z15testConvertNullv(
+// CHECK: define{{.*}} void @_Z15testConvertNullv(
 // CHECK: %[[T:.*]] = alloca { i64, i64 },
 // store { i64, i64 } zeroinitializer, { i64, i64 }* %[[T]],
 
diff --git a/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp b/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp
index d5f69e0485140..174aeda89d175 100644
--- a/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp
+++ b/clang/test/CodeGenCXX/ptrauth-type-info-vtable.cpp
@@ -4,6 +4,12 @@
 // RUN:   -fptrauth-vtable-pointer-address-discrimination \
 // RUN:   %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NODISC
 
+// RUN: %clang_cc1 -DENABLE_TID=0 -I%S -std=c++11 -triple=aarch64-linux-gnu \
+// RUN:   -fptrauth-calls -fptrauth-intrinsics \
+// RUN:   -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fptrauth-vtable-pointer-address-discrimination \
+// RUN:   %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NODISC
+
 // RUN: %clang_cc1 -DENABLE_TID=1 -I%S -std=c++11 -triple=arm64e-apple-darwin \
 // RUN:   -fptrauth-calls -fptrauth-intrinsics \
 // RUN:   -fptrauth-vtable-pointer-type-discrimination \
@@ -11,6 +17,13 @@
 // RUN:   -fptrauth-type-info-vtable-pointer-discrimination \
 // RUN:   %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,DISC
 
+// RUN: %clang_cc1 -DENABLE_TID=1 -I%S -std=c++11 -triple=aarch64-linux-gnu \
+// RUN:   -fptrauth-calls -fptrauth-intrinsics \
+// RUN:   -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fptrauth-vtable-pointer-address-discrimination \
+// RUN:   -fptrauth-type-info-vtable-pointer-discrimination \
+// RUN:   %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,DISC
+
 // copied from typeinfo
 namespace std {
 
@@ -64,7 +77,7 @@ TestStruct::~TestStruct(){}
 extern "C" void test_vtable(std::type_info* t) {
   t->test_method();
 }
-// NODISC: define void @test_vtable(ptr noundef %t)
+// NODISC: define{{.*}} void @test_vtable(ptr noundef %t)
 // NODISC: [[T_ADDR:%.*]] = alloca ptr, align 8
 // NODISC: store ptr %t, ptr [[T_ADDR]], align 8
 // NODISC: [[T:%.*]] = load ptr, ptr [[T_ADDR]], align 8
@@ -72,7 +85,7 @@ extern "C" void test_vtable(std::type_info* t) {
 // NODISC: [[CAST_VPTR:%.*]] = ptrtoint ptr [[VPTR]] to i64
 // NODISC: [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[CAST_VPTR]], i32 2, i64 0)
 
-// DISC: define void @test_vtable(ptr noundef %t)
+// DISC: define{{.*}} void @test_vtable(ptr noundef %t)
 // DISC: [[T_ADDR:%.*]] = alloca ptr, align 8
 // DISC: store ptr %t, ptr [[T_ADDR]], align 8
 // DISC: [[T:%.*]] = load ptr, ptr [[T_ADDR]], align 8
diff --git a/clang/test/Driver/aarch64-ptrauth.c b/clang/test/Driver/aarch64-ptrauth.c
index eeb9500792d75..c8e3aeef1640a 100644
--- a/clang/test/Driver/aarch64-ptrauth.c
+++ b/clang/test/Driver/aarch64-ptrauth.c
@@ -11,9 +11,11 @@
 // RUN:   -fno-ptrauth-auth-traps -fptrauth-auth-traps \
 // RUN:   -fno-ptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-address-discrimination \
 // RUN:   -fno-ptrauth-vtable-pointer-type-discrimination -fptrauth-vtable-pointer-type-discrimination \
+// RUN:   -fno-ptrauth-type-info-vtable-pointer-discrimination -fptrauth-type-info-vtable-pointer-discrimination \
 // RUN:   -fno-ptrauth-init-fini -fptrauth-init-fini \
+// RUN:   -fno-ptrauth-indirect-gotos -fptrauth-indirect-gotos \
 // RUN:   %s 2>&1 | FileCheck %s --check-prefix=ALL
-// ALL: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-init-fini"
+// ALL: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-init-fini" "-fptrauth-indirect-gotos"
 
 // RUN: %clang -### -c --target=aarch64-linux -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
 // RUN: %clang -### -c --target=aarch64-linux-pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
@@ -34,13 +36,16 @@
 
 // RUN: not %clang -### -c --target=x86_64 -fptrauth-intrinsics -fptrauth-calls -fptrauth-returns -fptrauth-auth-traps \
 // RUN:   -fptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-type-discrimination \
-// RUN:   -fptrauth-init-fini %s 2>&1 | FileCheck %s --check-prefix=ERR1
+// RUN:   -fptrauth-type-info-vtable-pointer-discrimination -fptrauth-indirect-gotos -fptrauth-init-fini %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=ERR1
 // ERR1:      error: unsupported option '-fptrauth-intrinsics' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-calls' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-returns' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-auth-traps' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-vtable-pointer-address-discrimination' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-vtable-pointer-type-discrimination' for target '{{.*}}'
+// ERR1-NEXT: error: unsupported option '-fptrauth-type-info-vtable-pointer-discrimination' for target '{{.*}}'
+// ERR1-NEXT: error: unsupported option '-fptrauth-indirect-gotos' for target '{{.*}}'
 // ERR1-NEXT: error: unsupported option '-fptrauth-init-fini' for target '{{.*}}'
 
 //// Only support PAuth ABI for Linux as for now.
diff --git a/clang/test/Preprocessor/ptrauth_feature.c b/clang/test/Preprocessor/ptrauth_feature.c
index 1330ad10b4b47..14059f827b94c 100644
--- a/clang/test/Preprocessor/ptrauth_feature.c
+++ b/clang/test/Preprocessor/ptrauth_feature.c
@@ -2,25 +2,31 @@
 //// For example, -fptrauth-init-fini will not affect codegen without -fptrauth-calls, but the preprocessor feature would be set anyway.
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-intrinsics | \
-// RUN:   FileCheck %s --check-prefixes=INTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOFUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=INTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-calls | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,CALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOFUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,CALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-returns | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,RETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOFUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,RETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-vtable-pointer-address-discrimination | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,VPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOFUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,VPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-vtable-pointer-type-discrimination | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,VPTR_TYPE_DISCR,NOFUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,VPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
+
+// RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-type-info-vtable-pointer-discrimination | \
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,TYPE_INFO_DISCR,NOFUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-function-pointer-type-discrimination | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,FUNC,NOINITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,FUNC,NOINITFINI,NOGOTOS
 
 // RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-init-fini | \
-// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOFUNC,INITFINI
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,INITFINI,NOGOTOS
+
+// RUN: %clang_cc1 -E %s -triple=aarch64 -fptrauth-indirect-gotos | \
+// RUN:   FileCheck %s --check-prefixes=NOINTRIN,NOCALLS,NORETS,NOVPTR_ADDR_DISCR,NOVPTR_TYPE_DISCR,NOTYPE_INFO_DISCR,NOFUNC,NOINITFINI,GOTOS
 
 #if __has_feature(ptrauth_intrinsics)
 // INTRIN: has_ptrauth_intrinsics
@@ -71,6 +77,14 @@ void has_ptrauth_vtable_pointer_type_discrimination() {}
 void no_ptrauth_vtable_pointer_type_discrimination() {}
 #endif
 
+#if __has_feature(ptrauth_type_info_vtable_pointer_discrimination)
+// TYPE_INFO_DISCR: has_ptrauth_type_info_vtable_pointer_discrimination
+void has_ptrauth_type_info_vtable_pointer_discrimination() {}
+#else
+// NOTYPE_INFO_DISCR: no_ptrauth_type_info_vtable_pointer_discrimination
+void no_ptrauth_type_info_vtable_pointer_discrimination() {}
+#endif
+
 #if __has_feature(ptrauth_function_pointer_type_discrimination)
 // FUNC: has_ptrauth_function_pointer_type_discrimination
 void has_ptrauth_function_pointer_type_discrimination() {}
@@ -86,3 +100,11 @@ void has_ptrauth_init_fini() {}
 // NOINITFINI: no_ptrauth_init_fini
 void no_ptrauth_init_fini() {}
 #endif
+
+#if __has_feature(ptrauth_indirect_gotos)
+// GOTOS: has_ptrauth_indirect_gotos
+void has_ptrauth_indirect_gotos() {}
+#else
+// NOGOTOS: no_ptrauth_indirect_gotos
+void no_ptrauth_indirect_gotos() {}
+#endif
diff --git a/clang/test/Sema/ptrauth-indirect-goto.c b/clang/test/Sema/ptrauth-indirect-goto.c
index 47bc76738d23b..7304f5c30a117 100644
--- a/clang/test/Sema/ptrauth-indirect-goto.c
+++ b/clang/test/Sema/ptrauth-indirect-goto.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -triple arm64e-apple-darwin -fsyntax-only -verify %s -fptrauth-indirect-gotos
+// RUN: %clang_cc1 -triple aarch64-linux-gnu   -fsyntax-only -verify %s -fptrauth-indirect-gotos
 
 int f() {
   static void *addrs[] = { &&l1, &&l2 };

From 9536b026ac46c34d607c0a277c8fbdc183d53b9d Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Mon, 29 Jul 2024 22:00:07 +0200
Subject: [PATCH 068/427] [compiler-rt] Fix format string warnings in FreeBSD
 DumpAllRegisters (#101072)

On FreeBSD amd64 (aka x86_64), registers are always defined as
`int64_t`, which in turn is equivalent to `long`. This leads to a number
of warnings in `DumpAllRegisters()`:

compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp:2245:31: warning:
format specifies type 'unsigned long long' but the argument has type
'__register_t' (aka 'long') [-Wformat]
     2245 |   Printf("rax = 0x%016llx  ", ucontext->uc_mcontext.mc_rax);
          |                   ~~~~~~~     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
          |                   %016lx
compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp:2246:31: warning:
format specifies type 'unsigned long long' but the argument has type
'__register_t' (aka 'long') [-Wformat]
     2246 |   Printf("rbx = 0x%016llx  ", ucontext->uc_mcontext.mc_rbx);
          |                   ~~~~~~~     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
          |                   %016lx
    ... more of these ...

Fix it by using the `lx` format.

(cherry picked from commit 62bd08acedc88d8976a017f7f6818f3167dfa697)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 483a1042a6238..76acf591871ab 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2242,25 +2242,25 @@ void SignalContext::DumpAllRegisters(void *context) {
 #  elif SANITIZER_FREEBSD
 #    if defined(__x86_64__)
   Report("Register values:\n");
-  Printf("rax = 0x%016llx  ", ucontext->uc_mcontext.mc_rax);
-  Printf("rbx = 0x%016llx  ", ucontext->uc_mcontext.mc_rbx);
-  Printf("rcx = 0x%016llx  ", ucontext->uc_mcontext.mc_rcx);
-  Printf("rdx = 0x%016llx  ", ucontext->uc_mcontext.mc_rdx);
+  Printf("rax = 0x%016lx  ", ucontext->uc_mcontext.mc_rax);
+  Printf("rbx = 0x%016lx  ", ucontext->uc_mcontext.mc_rbx);
+  Printf("rcx = 0x%016lx  ", ucontext->uc_mcontext.mc_rcx);
+  Printf("rdx = 0x%016lx  ", ucontext->uc_mcontext.mc_rdx);
   Printf("\n");
-  Printf("rdi = 0x%016llx  ", ucontext->uc_mcontext.mc_rdi);
-  Printf("rsi = 0x%016llx  ", ucontext->uc_mcontext.mc_rsi);
-  Printf("rbp = 0x%016llx  ", ucontext->uc_mcontext.mc_rbp);
-  Printf("rsp = 0x%016llx  ", ucontext->uc_mcontext.mc_rsp);
+  Printf("rdi = 0x%016lx  ", ucontext->uc_mcontext.mc_rdi);
+  Printf("rsi = 0x%016lx  ", ucontext->uc_mcontext.mc_rsi);
+  Printf("rbp = 0x%016lx  ", ucontext->uc_mcontext.mc_rbp);
+  Printf("rsp = 0x%016lx  ", ucontext->uc_mcontext.mc_rsp);
   Printf("\n");
-  Printf(" r8 = 0x%016llx  ", ucontext->uc_mcontext.mc_r8);
-  Printf(" r9 = 0x%016llx  ", ucontext->uc_mcontext.mc_r9);
-  Printf("r10 = 0x%016llx  ", ucontext->uc_mcontext.mc_r10);
-  Printf("r11 = 0x%016llx  ", ucontext->uc_mcontext.mc_r11);
+  Printf(" r8 = 0x%016lx  ", ucontext->uc_mcontext.mc_r8);
+  Printf(" r9 = 0x%016lx  ", ucontext->uc_mcontext.mc_r9);
+  Printf("r10 = 0x%016lx  ", ucontext->uc_mcontext.mc_r10);
+  Printf("r11 = 0x%016lx  ", ucontext->uc_mcontext.mc_r11);
   Printf("\n");
-  Printf("r12 = 0x%016llx  ", ucontext->uc_mcontext.mc_r12);
-  Printf("r13 = 0x%016llx  ", ucontext->uc_mcontext.mc_r13);
-  Printf("r14 = 0x%016llx  ", ucontext->uc_mcontext.mc_r14);
-  Printf("r15 = 0x%016llx  ", ucontext->uc_mcontext.mc_r15);
+  Printf("r12 = 0x%016lx  ", ucontext->uc_mcontext.mc_r12);
+  Printf("r13 = 0x%016lx  ", ucontext->uc_mcontext.mc_r13);
+  Printf("r14 = 0x%016lx  ", ucontext->uc_mcontext.mc_r14);
+  Printf("r15 = 0x%016lx  ", ucontext->uc_mcontext.mc_r15);
   Printf("\n");
 #    elif defined(__i386__)
   Report("Register values:\n");

From 404746b9f21bef631eac09469bfcc35e8cfe0e63 Mon Sep 17 00:00:00 2001
From: Daniel Martinez <danielpedromartinez@duck.com>
Date: Mon, 29 Jul 2024 22:20:18 +0000
Subject: [PATCH 069/427] [nsan] Remove mallopt from nsan_interceptors
 (#101055)

Fixes a build failure on 19.1.0-rc1 when building on linux with musl as
the libc

musl does not provide mallopt, whereas glibc does. mallopt has
portability issues with other libc implementations. Just remove the use.

Co-authored-by: Daniel Martinez <danielmartinez@cock.li>
(cherry picked from commit 2c3eb8db057b9d58acd4735999f0f5d5d8d55b0d)
---
 compiler-rt/lib/nsan/nsan_interceptors.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cpp b/compiler-rt/lib/nsan/nsan_interceptors.cpp
index 544b44f53cc42..852524bd37332 100644
--- a/compiler-rt/lib/nsan/nsan_interceptors.cpp
+++ b/compiler-rt/lib/nsan/nsan_interceptors.cpp
@@ -21,10 +21,6 @@
 
 #include <wchar.h>
 
-#if SANITIZER_LINUX
-extern "C" int mallopt(int param, int value);
-#endif
-
 using namespace __sanitizer;
 using __nsan::nsan_init_is_running;
 using __nsan::nsan_initialized;
@@ -209,12 +205,6 @@ void __nsan::InitializeInterceptors() {
   static bool initialized = false;
   CHECK(!initialized);
 
-  // Instruct libc malloc to consume less memory.
-#if SANITIZER_LINUX
-  mallopt(1, 0);          // M_MXFAST
-  mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD
-#endif
-
   InitializeMallocInterceptors();
 
   INTERCEPT_FUNCTION(memset);

From 392b77d58a91049a155f3390ec16941a848aa766 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Mon, 29 Jul 2024 18:01:44 -0700
Subject: [PATCH 070/427] [clang-format] Fix misannotations of `<` in ternary
 expressions (#100980)

Fixes #100300.

(cherry picked from commit 73c961a3345c697f40e2148318f34f5f347701c1)
---
 clang/lib/Format/TokenAnnotator.cpp           | 42 ++++++++++++-------
 clang/unittests/Format/TokenAnnotatorTest.cpp | 23 ++++++++++
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 5c11f3cb1a874..63c8699fd62d1 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -154,8 +154,8 @@ class AnnotatingParser {
     if (NonTemplateLess.count(CurrentToken->Previous) > 0)
       return false;
 
-    const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
-    if (Previous.Previous) {
+    if (const auto &Previous = *CurrentToken->Previous; // The '<'.
+        Previous.Previous) {
       if (Previous.Previous->Tok.isLiteral())
         return false;
       if (Previous.Previous->is(tok::r_brace))
@@ -175,11 +175,13 @@ class AnnotatingParser {
     FormatToken *Left = CurrentToken->Previous;
     Left->ParentBracket = Contexts.back().ContextKind;
     ScopedContextCreator ContextCreator(*this, tok::less, 12);
-
     Contexts.back().IsExpression = false;
+
+    const auto *BeforeLess = Left->Previous;
+
     // If there's a template keyword before the opening angle bracket, this is a
     // template parameter, not an argument.
-    if (Left->Previous && Left->Previous->isNot(tok::kw_template))
+    if (BeforeLess && BeforeLess->isNot(tok::kw_template))
       Contexts.back().ContextType = Context::TemplateArgument;
 
     if (Style.Language == FormatStyle::LK_Java &&
@@ -187,19 +189,24 @@ class AnnotatingParser {
       next();
     }
 
-    while (CurrentToken) {
+    for (bool SeenTernaryOperator = false; CurrentToken;) {
+      const bool InExpr = Contexts[Contexts.size() - 2].IsExpression;
       if (CurrentToken->is(tok::greater)) {
+        const auto *Next = CurrentToken->Next;
         // Try to do a better job at looking for ">>" within the condition of
         // a statement. Conservatively insert spaces between consecutive ">"
         // tokens to prevent splitting right bitshift operators and potentially
         // altering program semantics. This check is overly conservative and
         // will prevent spaces from being inserted in select nested template
         // parameter cases, but should not alter program semantics.
-        if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
+        if (Next && Next->is(tok::greater) &&
             Left->ParentBracket != tok::less &&
             CurrentToken->getStartOfNonWhitespace() ==
-                CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
-                    -1)) {
+                Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
+          return false;
+        }
+        if (InExpr && SeenTernaryOperator &&
+            (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
           return false;
         }
         Left->MatchingParen = CurrentToken;
@@ -210,14 +217,14 @@ class AnnotatingParser {
         //   msg: < item: data >
         // In TT_TextProto, map<key, value> does not occur.
         if (Style.Language == FormatStyle::LK_TextProto ||
-            (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
-             Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
+            (Style.Language == FormatStyle::LK_Proto && BeforeLess &&
+             BeforeLess->isOneOf(TT_SelectorName, TT_DictLiteral))) {
           CurrentToken->setType(TT_DictLiteral);
         } else {
           CurrentToken->setType(TT_TemplateCloser);
           CurrentToken->Tok.setLength(1);
         }
-        if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
+        if (Next && Next->Tok.isLiteral())
           return false;
         next();
         return true;
@@ -229,18 +236,21 @@ class AnnotatingParser {
       }
       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace))
         return false;
+      const auto &Prev = *CurrentToken->Previous;
       // If a && or || is found and interpreted as a binary operator, this set
       // of angles is likely part of something like "a < b && c > d". If the
       // angles are inside an expression, the ||/&& might also be a binary
       // operator that was misinterpreted because we are parsing template
       // parameters.
       // FIXME: This is getting out of hand, write a decent parser.
-      if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
-          CurrentToken->Previous->is(TT_BinaryOperator) &&
-          Contexts[Contexts.size() - 2].IsExpression &&
-          !Line.startsWith(tok::kw_template)) {
-        return false;
+      if (InExpr && !Line.startsWith(tok::kw_template) &&
+          Prev.is(TT_BinaryOperator)) {
+        const auto Precedence = Prev.getPrecedence();
+        if (Precedence > prec::Conditional && Precedence < prec::Relational)
+          return false;
       }
+      if (Prev.is(TT_ConditionalExpr))
+        SeenTernaryOperator = true;
       updateParameterCount(Left, CurrentToken);
       if (Style.Language == FormatStyle::LK_Proto) {
         if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 51810ad047a26..386649bb6679f 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -577,12 +577,20 @@ TEST_F(TokenAnnotatorTest, UnderstandsTernaryInTemplate) {
   EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser);
 
   // IsExpression = true
+
   Tokens = annotate("return foo<true ? 1 : 2>();");
   ASSERT_EQ(Tokens.size(), 13u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[4], tok::question, TT_ConditionalExpr);
   EXPECT_TOKEN(Tokens[6], tok::colon, TT_ConditionalExpr);
   EXPECT_TOKEN(Tokens[8], tok::greater, TT_TemplateCloser);
+
+  Tokens = annotate("return foo<true ? 1 : 2>{};");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[4], tok::question, TT_ConditionalExpr);
+  EXPECT_TOKEN(Tokens[6], tok::colon, TT_ConditionalExpr);
+  EXPECT_TOKEN(Tokens[8], tok::greater, TT_TemplateCloser);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsNonTemplateAngleBrackets) {
@@ -596,6 +604,21 @@ TEST_F(TokenAnnotatorTest, UnderstandsNonTemplateAngleBrackets) {
   EXPECT_TOKEN(Tokens[1], tok::less, TT_BinaryOperator);
   EXPECT_TOKEN(Tokens[7], tok::greater, TT_BinaryOperator);
 
+  Tokens = annotate("return A < B ? true : A > B;");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::less, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[8], tok::greater, TT_BinaryOperator);
+
+  Tokens = annotate("return A < B ? true : A > B ? false : false;");
+  ASSERT_EQ(Tokens.size(), 16u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::less, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[8], tok::greater, TT_BinaryOperator);
+
+  Tokens = annotate("return A < B ^ A > B;");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::less, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[6], tok::greater, TT_BinaryOperator);
+
   Tokens = annotate("ratio{-1, 2} < ratio{-1, 3} == -1 / 3 > -1 / 2;");
   ASSERT_EQ(Tokens.size(), 27u) << Tokens;
   EXPECT_TOKEN(Tokens[7], tok::less, TT_BinaryOperator);

From 63d44ea32a28ed49e99572ca46b03eb92706433e Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 29 Jul 2024 22:46:18 +0200
Subject: [PATCH 071/427] [NVPTX] Fix DwarfFrameBase construction (#101000)

The `{0}` here was initializing the first union member `Register`,
rather than the union member used by CFA, which is `Offset`. Prior to
https://github.com/llvm/llvm-project/pull/99263 this was harmless, but
now they have different layout, leading to test failures on some
platforms (at least i686 and s390x).

(cherry picked from commit 842a332f11f53c698fa0560505e533ecdca28876)
---
 llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 10ae81e0460e3..9abe0e3186f20 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -93,5 +93,8 @@ MachineBasicBlock::iterator NVPTXFrameLowering::eliminateCallFramePseudoInstr(
 
 TargetFrameLowering::DwarfFrameBase
 NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
-  return {DwarfFrameBase::CFA, {0}};
+  DwarfFrameBase FrameBase;
+  FrameBase.Kind = DwarfFrameBase::CFA;
+  FrameBase.Location.Offset = 0;
+  return FrameBase;
 }

From 146fc62f508ba12026f712d9576c80ea95fc6747 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Sat, 27 Jul 2024 14:29:05 +0200
Subject: [PATCH 072/427] [clang][ARM64EC] Add support for hybrid_patchable
 attribute. (#99478)

(cherry picked from commit ea98dc8b8f508b8393651992830e5e51d3876728)
---
 clang/docs/ReleaseNotes.rst                   |  3 ++
 clang/include/clang/Basic/Attr.td             |  9 +++++
 clang/include/clang/Basic/AttrDocs.td         | 10 ++++++
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 ++
 clang/lib/CodeGen/CodeGenFunction.cpp         |  3 ++
 clang/lib/Sema/SemaDecl.cpp                   |  5 +++
 clang/lib/Sema/SemaDeclAttr.cpp               |  3 ++
 clang/test/CodeGen/arm64ec-hybrid-patchable.c | 34 +++++++++++++++++++
 ...a-attribute-supported-attributes-list.test |  1 +
 9 files changed, 71 insertions(+)
 create mode 100644 clang/test/CodeGen/arm64ec-hybrid-patchable.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 71d615553c613..610061406a1ec 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -629,6 +629,9 @@ Attribute Changes in Clang
   The attributes declare constraints about a function's behavior pertaining to blocking and
   heap memory allocation.
 
+- The ``hybrid_patchable`` attribute is now supported on ARM64EC targets. It can be used to specify
+  that a function requires an additional x86-64 thunk, which may be patched at runtime.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 - Clang now emits an error instead of a warning for ``-Wundefined-internal``
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 4825979a974d2..46d0a66d59c37 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -477,6 +477,9 @@ def TargetELF : TargetSpec {
 def TargetELFOrMachO : TargetSpec {
   let ObjectFormats = ["ELF", "MachO"];
 }
+def TargetWindowsArm64EC : TargetSpec {
+  let CustomCode = [{ Target.getTriple().isWindowsArm64EC() }];
+}
 
 def TargetSupportsInitPriority : TargetSpec {
   let CustomCode = [{ !Target.getTriple().isOSzOS() }];
@@ -4027,6 +4030,12 @@ def SelectAny : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def HybridPatchable : InheritableAttr, TargetSpecificAttr<TargetWindowsArm64EC> {
+  let Spellings = [Declspec<"hybrid_patchable">, Clang<"hybrid_patchable">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [HybridPatchableDocs];
+}
+
 def Thread : Attr {
   let Spellings = [Declspec<"thread">];
   let LangOpts = [MicrosoftExt];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 99738812c8157..b5d468eb5ec95 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -5985,6 +5985,16 @@ For more information see
 or `msvc documentation <https://docs.microsoft.com/pl-pl/cpp/cpp/selectany>`_.
 }]; }
 
+def HybridPatchableDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``hybrid_patchable`` attribute declares an ARM64EC function with an additional
+x86-64 thunk, which may be patched at runtime.
+
+For more information see
+`ARM64EC ABI documentation <https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi>`_.
+}]; }
+
 def WebAssemblyExportNameDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eb0506e71fe3f..95ce4166ceb66 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3677,6 +3677,9 @@ def err_attribute_weak_static : Error<
   "weak declaration cannot have internal linkage">;
 def err_attribute_selectany_non_extern_data : Error<
   "'selectany' can only be applied to data items with external linkage">;
+def warn_attribute_hybrid_patchable_non_extern : Warning<
+  "'hybrid_patchable' is ignored on functions without external linkage">,
+  InGroup<IgnoredAttributes>;
 def err_declspec_thread_on_thread_variable : Error<
   "'__declspec(thread)' applied to variable that already has a "
   "thread-local storage specifier">;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index d6078696a7d91..af201554898f3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -991,6 +991,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   if (D && D->hasAttr<NoProfileFunctionAttr>())
     Fn->addFnAttr(llvm::Attribute::NoProfile);
 
+  if (D && D->hasAttr<HybridPatchableAttr>())
+    Fn->addFnAttr(llvm::Attribute::HybridPatchable);
+
   if (D) {
     // Function attributes take precedence over command line flags.
     if (auto *A = D->getAttr<FunctionReturnThunksAttr>()) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index bb25a0b3a45ae..f60cc78be4f92 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6890,6 +6890,11 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
     }
   }
 
+  if (HybridPatchableAttr *Attr = ND.getAttr<HybridPatchableAttr>()) {
+    if (!ND.isExternallyVisible())
+      S.Diag(Attr->getLocation(),
+             diag::warn_attribute_hybrid_patchable_non_extern);
+  }
   if (const InheritableAttr *Attr = getDLLAttr(&ND)) {
     auto *VD = dyn_cast<VarDecl>(&ND);
     bool IsAnonymousNS = false;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 5fd8622c90dd8..10bacc17a07ca 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6868,6 +6868,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_MSConstexpr:
     handleMSConstexprAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_HybridPatchable:
+    handleSimpleAttribute<HybridPatchableAttr>(S, D, AL);
+    break;
 
   // HLSL attributes:
   case ParsedAttr::AT_HLSLNumThreads:
diff --git a/clang/test/CodeGen/arm64ec-hybrid-patchable.c b/clang/test/CodeGen/arm64ec-hybrid-patchable.c
new file mode 100644
index 0000000000000..4d1fa12afd2aa
--- /dev/null
+++ b/clang/test/CodeGen/arm64ec-hybrid-patchable.c
@@ -0,0 +1,34 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple arm64ec-pc-windows -fms-extensions -emit-llvm -o - %s -verify | FileCheck %s
+
+// CHECK: ;    Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define dso_local i32 @func() #0 {
+int __attribute__((hybrid_patchable)) func(void) {  return 1; }
+
+// CHECK: ;    Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define dso_local i32 @func2() #0 {
+int __declspec(hybrid_patchable) func2(void) {  return 2; }
+
+// CHECK: ;    Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define dso_local i32 @func3() #0 {
+int __declspec(hybrid_patchable) func3(void);
+int func3(void) {  return 3; }
+
+// CHECK: ;    Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define dso_local i32 @func4() #0 {
+[[clang::hybrid_patchable]] int func4(void);
+int func4(void) {  return 3; }
+
+// CHECK: ; Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define internal void @static_func() #0 {
+// expected-warning@+1 {{'hybrid_patchable' is ignored on functions without external linkage}}
+static void __declspec(hybrid_patchable) static_func(void) {}
+
+// CHECK: ;    Function Attrs: hybrid_patchable noinline nounwind optnone
+// CHECK-NEXT: define linkonce_odr dso_local i32 @func5() #0 comdat {
+int inline __declspec(hybrid_patchable) func5(void) {  return 4; }
+
+void caller(void) {
+  static_func();
+  func5();
+}
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 33f9c2f51363c..e082db698ef0c 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -83,6 +83,7 @@
 // CHECK-NEXT: HIPManaged (SubjectMatchRule_variable)
 // CHECK-NEXT: HLSLResourceClass (SubjectMatchRule_record_not_is_union)
 // CHECK-NEXT: Hot (SubjectMatchRule_function)
+// CHECK-NEXT: HybridPatchable (SubjectMatchRule_function)
 // CHECK-NEXT: IBAction (SubjectMatchRule_objc_method_is_instance)
 // CHECK-NEXT: IFunc (SubjectMatchRule_function)
 // CHECK-NEXT: InitPriority (SubjectMatchRule_variable)

From 67f509a93be67aab643ab2ca333a2f8149f49be2 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 08:54:10 +0200
Subject: [PATCH 073/427] [sanitizer_common][test] Always skip select allocator
 tests on SPARC V9 (#100530)

Two allocator tests `FAIL` on Linux/sparc64:
```
  SanitizerCommon-Unit :: ./Sanitizer-sparcv9-Test/SanitizerCommon/CombinedAllocator32Compact
  SanitizerCommon-Unit :: ./Sanitizer-sparcv9-Test/SanitizerCommon/SizeClassAllocator32Iteration
```
The failure mode is the same on Solaris/sparcv9, where those tests are
already disabled since 0f69cbe2694a4740e6db5b99bd81a26746403072.
Therefore, this patch skips them on SPARC in general.

Tested on `sparc64-unknown-linux-gnu` and `sparcv9-sun-solaris2.11`.

(cherry picked from commit 3d149123f46cee5ac8d961c6bf77c5c566f1e410)
---
 .../tests/sanitizer_allocator_test.cpp              | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
index 1a1ccce82d259..601897a64f051 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp
@@ -28,12 +28,13 @@
 
 using namespace __sanitizer;
 
-#if SANITIZER_SOLARIS && defined(__sparcv9)
+#if defined(__sparcv9)
 // FIXME: These tests probably fail because Solaris/sparcv9 uses the full
-// 64-bit address space.  Needs more investigation
-#define SKIP_ON_SOLARIS_SPARCV9(x) DISABLED_##x
+// 64-bit address space.  Same on Linux/sparc64, so probably a general SPARC
+// issue.  Needs more investigation
+#  define SKIP_ON_SPARCV9(x) DISABLED_##x
 #else
-#define SKIP_ON_SOLARIS_SPARCV9(x) x
+#  define SKIP_ON_SPARCV9(x) x
 #endif
 
 // On 64-bit systems with small virtual address spaces (e.g. 39-bit) we can't
@@ -781,7 +782,7 @@ TEST(SanitizerCommon, CombinedAllocator64VeryCompact) {
 }
 #endif
 
-TEST(SanitizerCommon, SKIP_ON_SOLARIS_SPARCV9(CombinedAllocator32Compact)) {
+TEST(SanitizerCommon, SKIP_ON_SPARCV9(CombinedAllocator32Compact)) {
   TestCombinedAllocator<Allocator32Compact>();
 }
 
@@ -1028,7 +1029,7 @@ TEST(SanitizerCommon, SizeClassAllocator64DynamicPremappedIteration) {
 #endif
 #endif
 
-TEST(SanitizerCommon, SKIP_ON_SOLARIS_SPARCV9(SizeClassAllocator32Iteration)) {
+TEST(SanitizerCommon, SKIP_ON_SPARCV9(SizeClassAllocator32Iteration)) {
   TestSizeClassAllocatorIteration<Allocator32Compact>();
 }
 

From 3389604cd95d4d12eb975f4057ed21828f5b53ce Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Thu, 25 Jul 2024 18:37:36 +0200
Subject: [PATCH 074/427] [libc++][spaceship] Marks P1614 as complete. (#99375)

Implements parts of:
- P1902R1 Missing feature-test macros 2017-2019

Completes:
- P1614R2 The Mothership has Landed

Fixes #100018
---
 libcxx/docs/FeatureTestMacroTable.rst              |  2 +-
 libcxx/docs/ReleaseNotes/19.rst                    |  1 +
 libcxx/docs/Status/Cxx20.rst                       |  1 +
 libcxx/docs/Status/Cxx20Papers.csv                 |  2 +-
 libcxx/docs/Status/SpaceshipPapers.csv             |  2 +-
 libcxx/include/version                             |  4 ++--
 .../compare.version.compile.pass.cpp               | 14 +++++++-------
 .../version.version.compile.pass.cpp               | 14 +++++++-------
 .../generate_feature_test_macro_components.py      |  3 +--
 9 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 262da3f8937d2..a1506e115fe70 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -290,7 +290,7 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_syncbuf``                                      ``201803L``
     ---------------------------------------------------------- -----------------
-    ``__cpp_lib_three_way_comparison``                         ``201711L``
+    ``__cpp_lib_three_way_comparison``                         ``201907L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_to_address``                                   ``201711L``
     ---------------------------------------------------------- -----------------
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index c2c2bfbed4ac3..92896f6b0d11e 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -53,6 +53,7 @@ Implemented Papers
 ------------------
 
 - P1132R8 - ``out_ptr`` - a scalable output pointer abstraction
+- P1614R2 - The Mothership has Landed
 - P2637R3 - Member ``visit``
 - P2652R2 - Disallow User Specialization of ``allocator_traits``
 - P2819R2 - Add ``tuple`` protocol to ``complex``
diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst
index c00d6fb237286..b76e30fbb3712 100644
--- a/libcxx/docs/Status/Cxx20.rst
+++ b/libcxx/docs/Status/Cxx20.rst
@@ -48,6 +48,7 @@ Paper Status
    .. [#note-P0883.1] P0883: shared_ptr and floating-point changes weren't applied as they themselves aren't implemented yet.
    .. [#note-P0883.2] P0883: ``ATOMIC_FLAG_INIT`` was marked deprecated in version 14.0, but was undeprecated with the implementation of LWG3659 in version 15.0.
    .. [#note-P0660] P0660: The paper is implemented but the features are experimental and can be enabled via ``-fexperimental-library``.
+   .. [#note-P1614] P1614: ``std::strong_order(long double, long double)`` is partly implemented.
    .. [#note-P0355] P0355: The implementation status is:
 
       * ``Calendars`` mostly done in Clang 7
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index 34fc5586f74d9..4015d7ad48b06 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -123,7 +123,7 @@
 "`P1522R1 <https://wg21.link/P1522R1>`__","LWG","Iterator Difference Type and Integer Overflow","Cologne","|Complete|","15.0","|ranges|"
 "`P1523R1 <https://wg21.link/P1523R1>`__","LWG","Views and Size Types","Cologne","|Complete|","15.0","|ranges|"
 "`P1612R1 <https://wg21.link/P1612R1>`__","LWG","Relocate Endian's Specification","Cologne","|Complete|","10.0"
-"`P1614R2 <https://wg21.link/P1614R2>`__","LWG","The Mothership has Landed","Cologne","|In Progress|",""
+"`P1614R2 <https://wg21.link/P1614R2>`__","LWG","The Mothership has Landed","Cologne","|Complete| [#note-P1614]_","19.0"
 "`P1638R1 <https://wg21.link/P1638R1>`__","LWG","basic_istream_view::iterator should not be copyable","Cologne","|Complete|","16.0","|ranges|"
 "`P1643R1 <https://wg21.link/P1643R1>`__","LWG","Add wait/notify to atomic_ref","Cologne","|Complete|","19.0"
 "`P1644R0 <https://wg21.link/P1644R0>`__","LWG","Add wait/notify to atomic<shared_ptr>","Cologne","",""
diff --git a/libcxx/docs/Status/SpaceshipPapers.csv b/libcxx/docs/Status/SpaceshipPapers.csv
index 39e1f968c1754..1ab64a9caf86a 100644
--- a/libcxx/docs/Status/SpaceshipPapers.csv
+++ b/libcxx/docs/Status/SpaceshipPapers.csv
@@ -1,5 +1,5 @@
 "Number","Name","Status","First released version"
-`P1614R2 <https://wg21.link/P1614R2>`_,The Mothership has Landed,|In Progress|,
+`P1614R2 <https://wg21.link/P1614R2>`_,The Mothership has Landed,|Complete|,19.0
 `P2404R3 <https://wg21.link/P2404R3>`_,"Relaxing ``equality_comparable_with``'s, ``totally_ordered_with``'s, and ``three_way_comparable_with``'s common reference requirements to support move-only types",,
 `LWG3330 <https://wg21.link/LWG3330>`_,Include ``<compare>`` from most library headers,"|Complete|","13.0"
 `LWG3347 <https://wg21.link/LWG3347>`_,"``std::pair<T, U>`` now requires ``T`` and ``U`` to be *less-than-comparable*",|Nothing To Do|,
diff --git a/libcxx/include/version b/libcxx/include/version
index 40548098a92d6..fe64343eafbc9 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -238,7 +238,7 @@ __cpp_lib_string_view                                   202403L <string> <string
 __cpp_lib_submdspan                                     202306L <mdspan>
 __cpp_lib_syncbuf                                       201803L <syncstream>
 __cpp_lib_text_encoding                                 202306L <text_encoding>
-__cpp_lib_three_way_comparison                          201711L <compare>
+__cpp_lib_three_way_comparison                          201907L <compare>
 __cpp_lib_to_address                                    201711L <memory>
 __cpp_lib_to_array                                      201907L <array>
 __cpp_lib_to_chars                                      202306L <charconv>
@@ -446,7 +446,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_SYNCSTREAM)
 #   define __cpp_lib_syncbuf                            201803L
 # endif
-# define __cpp_lib_three_way_comparison                 201711L
+# define __cpp_lib_three_way_comparison                 201907L
 # define __cpp_lib_to_address                           201711L
 # define __cpp_lib_to_array                             201907L
 # define __cpp_lib_type_identity                        201806L
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/compare.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/compare.version.compile.pass.cpp
index aac00f20c7b45..1d61f43f9ee51 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/compare.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/compare.version.compile.pass.cpp
@@ -16,7 +16,7 @@
 // Test the feature test macros defined by <compare>
 
 /*  Constant                          Value
-    __cpp_lib_three_way_comparison    201711L [C++20]
+    __cpp_lib_three_way_comparison    201907L [C++20]
 */
 
 #include <compare>
@@ -45,8 +45,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++20"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++20"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++20"
 # endif
 
 #elif TEST_STD_VER == 23
@@ -54,8 +54,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++23"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++23"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++23"
 # endif
 
 #elif TEST_STD_VER > 23
@@ -63,8 +63,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++26"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++26"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++26"
 # endif
 
 #endif // TEST_STD_VER > 23
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index f26e7dc4b4c63..b8bad696f1bae 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -221,7 +221,7 @@
     __cpp_lib_submdspan                                     202306L [C++26]
     __cpp_lib_syncbuf                                       201803L [C++20]
     __cpp_lib_text_encoding                                 202306L [C++26]
-    __cpp_lib_three_way_comparison                          201711L [C++20]
+    __cpp_lib_three_way_comparison                          201907L [C++20]
     __cpp_lib_to_address                                    201711L [C++20]
     __cpp_lib_to_array                                      201907L [C++20]
     __cpp_lib_to_chars                                      201611L [C++17]
@@ -4438,8 +4438,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++20"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++20"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++20"
 # endif
 
 # ifndef __cpp_lib_to_address
@@ -6037,8 +6037,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++23"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++23"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++23"
 # endif
 
 # ifndef __cpp_lib_to_address
@@ -7960,8 +7960,8 @@
 # ifndef __cpp_lib_three_way_comparison
 #   error "__cpp_lib_three_way_comparison should be defined in c++26"
 # endif
-# if __cpp_lib_three_way_comparison != 201711L
-#   error "__cpp_lib_three_way_comparison should have the value 201711L in c++26"
+# if __cpp_lib_three_way_comparison != 201907L
+#   error "__cpp_lib_three_way_comparison should have the value 201907L in c++26"
 # endif
 
 # ifndef __cpp_lib_to_address
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index a351112471295..6c42748002aee 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -1302,8 +1302,7 @@ def add_version_header(tc):
         },
         {
             "name": "__cpp_lib_three_way_comparison",
-            "values": {"c++20": 201711},
-            # {"c++20": 201907} # P1614R2 The Mothership has Landed (see P1902R1 Missing feature-test macros 2017-2019)
+            "values": {"c++20": 201907},
             "headers": ["compare"],
         },
         {

From 63cf3d4fb07a4e2c484ae44cec5df2c273fc7fff Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp@ca.ibm.com>
Date: Tue, 23 Jul 2024 21:59:27 -0400
Subject: [PATCH 075/427] [RegisterCoalescer] Fix SUBREG_TO_REG handling in the
 RegisterCoalescer. (#96839)

The issue with the handling of the SUBREG_TO_REG is that we don't join
the subranges correctly when we join live ranges across the
SUBREG_TO_REG. For example when joining across this:
```
32B	  %2:gr64_nosp = SUBREG_TO_REG 0, %0:gr32, %subreg.sub_32bit
```
we want to join these live ranges:
```
%0 [16r,32r:0) 0@16r  weight:0.000000e+00
%2 [32r,112r:0) 0@32r  weight:0.000000e+00
```
Before the fix the range for the resulting merged `%2` is:
```
%2 [16r,112r:0) 0@16r  weight:0.000000e+00
```
After the fix it is now this:
```
%2 [16r,112r:0) 0@16r  L000000000000000F [16r,112r:0) 0@16r  weight:0.000000e+00
```

Two tests are added to this fix. The X86 test fails without the patch.
The PowerPC test passes with and without the patch but is added as a way
track future possible failures when register classes are changed in a
future patch.

(cherry picked from commit 26fa399012da00fbf806f50ad72a3b5f0ee63eab)
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp        |  7 ++++
 .../test/CodeGen/PowerPC/subreg-coalescer.mir | 34 +++++++++++++++++
 llvm/test/CodeGen/X86/subreg-fail.mir         | 37 +++++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
 create mode 100644 llvm/test/CodeGen/X86/subreg-fail.mir

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 1c35a88b4dc4a..043ea20191487 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+  } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
+    LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
+                           CP.getNewRC()->getLaneMask(), LHS);
+    mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
+                      CP.getDstIdx());
+    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
new file mode 100644
index 0000000000000..39eab1f562e71
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s \
+# RUN:   -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on PowerPC. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name: check_subregs
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x3
+
+    ; CHECK-LABEL: name: check_subregs
+    ; CHECK: liveins: $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+    ; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead [[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
+    ; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], implicit $rm
+    ; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
+    ; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
+    %0:g8rc_and_g8rc_nox0 = COPY $x3
+    %1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+    %3:f4rc = FRSP killed %1, implicit $rm
+    %4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
+    %5:vrrc = XVCVDPSP killed %4, implicit $rm
+    $v2 = COPY %5
+    BLR8 implicit $lr8, implicit $rm, implicit $v2
+...
+
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
new file mode 100644
index 0000000000000..c8146f099b814
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple x86_64-unknown-unknown %s \
+# RUN:   -verify-coalescing -enable-subreg-liveness \
+# RUN:   --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on X86. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name:            test1
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test1
+    ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
+    ; CHECK-NEXT: MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, [[LEA64r]].sub_32bit :: (volatile store (s32) into `ptr undef`)
+    ; CHECK-NEXT: RET 0, undef $eax
+    %0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    %3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %5:gr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32bit
+    %6:gr64 = COPY killed %5
+    %6:gr64 = SHL64ri %6, 32, implicit-def dead $eflags
+    %7:gr64 = LEA64r killed %6, 1, killed %2, 256, $noreg
+    %8:gr64 = COPY killed %7
+    %8:gr64 = SHR64ri %8, 8, implicit-def dead $eflags
+    %9:gr32 = COPY killed %8.sub_32bit
+    MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, killed %9 :: (volatile store (s32) into `ptr undef`)
+    RET 0, undef $eax
+
+...

From 64699d328a39d3a2cc7c043768111794782ef9f0 Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Tue, 30 Jul 2024 06:28:59 -0400
Subject: [PATCH 076/427] [libunwind][AIX] Fix the wrong traceback from signal
 handler (#101069)

Patch [llvm#92291](https://github.com/llvm/llvm-project/pull/92291)
causes wrong traceback from a signal handler for AIX because the AIX
unwinder uses the traceback table at the end of each function instead of
FDE/CIE for unwinding. This patch adds a condition to exclude traceback
table based unwinding from the code added by the patch.

(cherry picked from commit d90fa612604b49dfc81c3f42c106fab7401322ec)
---
 libunwind/src/UnwindCursor.hpp             | 3 ++-
 libunwind/test/aix_signal_unwind.pass.sh.S | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 2ec60e4c123d5..758557337899e 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -2589,7 +2589,8 @@ void UnwindCursor<A, R>::setInfoBasedOnIPRegister(bool isReturnAddress) {
     --pc;
 #endif
 
-#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32))
+#if !(defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)) &&            \
+    !defined(_LIBUNWIND_SUPPORT_TBTAB_UNWIND)
   // In case of this is frame of signal handler, the IP saved in the signal
   // handler points to first non-executed instruction, while FDE/CIE expects IP
   // to be after the first non-executed instruction.
diff --git a/libunwind/test/aix_signal_unwind.pass.sh.S b/libunwind/test/aix_signal_unwind.pass.sh.S
index 9ca18e9481f4f..a666577d095b1 100644
--- a/libunwind/test/aix_signal_unwind.pass.sh.S
+++ b/libunwind/test/aix_signal_unwind.pass.sh.S
@@ -10,7 +10,7 @@
 // a correct traceback when the function raising the signal does not save
 // the link register or does not store the stack back chain.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target=powerpc{{(64)?}}-ibm-aix{{.*}}
 
 // Test when the function raising the signal does not save the link register
 // RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags}

From 843ed4b722074466d3c462b8180b5abe25b4b7c8 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Tue, 30 Jul 2024 14:22:50 +0200
Subject: [PATCH 077/427] [CodeGen][ARM64EC] Use alias symbol for exporting
 hybrid_patchable functions. (#100872)

Exporting $hp_target symbol doesn't make sense, use the unmangled alias instead.
This is not compatible with MSVC, but it makes using dllexport together with
hybrid_patchable attribute possible.

(cherry picked from commit 41c0f89f5532ec110b927c3a67ceac83448c5d98)
---
 llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp | 5 +++++
 llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 310b152ef9817..415edb189e60c 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -833,6 +833,11 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
                                               "EXP+" + MangledName.value())));
       A->setAliasee(&F);
 
+      if (F.hasDLLExportStorageClass()) {
+        A->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
+        F.setDLLStorageClass(GlobalValue::DefaultStorageClass);
+      }
+
       FnsMap[A] = GlobalAlias::create(GlobalValue::LinkOnceODRLinkage,
                                       MangledName.value(), &F);
       PatchableFns.insert(A);
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
index e5387d40b9c64..64fb5b36b2c62 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
@@ -238,7 +238,7 @@ define dso_local void @caller() nounwind {
 ; CHECK-NEXT:      .symidx exp
 ; CHECK-NEXT:      .word   0
 ; CHECK-NEXT:      .section        .drectve,"yni"
-; CHECK-NEXT:      .ascii  " /EXPORT:\"#exp$hp_target,EXPORTAS,exp$hp_target\""
+; CHECK-NEXT:      .ascii  " /EXPORT:exp"
 
 ; CHECK-NEXT:      .def    func;
 ; CHECK-NEXT:      .scl    2;

From 7f1cd7866ef858bbdb2a4238c81462a0efce5562 Mon Sep 17 00:00:00 2001
From: Hubert Tong <hubert.reinterpretcast@gmail.com>
Date: Tue, 30 Jul 2024 17:56:55 -0400
Subject: [PATCH 078/427] ReleaseNotes.rst: Fix typo "my" for "may"

Replace typo for "may" with "can".
---
 clang/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 610061406a1ec..b4ef1e9672a5d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -147,7 +147,7 @@ Clang Frontend Potentially Breaking Changes
   that ``none`` means that there is no operating system. As opposed to an unknown
   type of operating system.
 
-  This change my cause clang to not find libraries, or libraries to be built at
+  This change can cause clang to not find libraries, or libraries to be built at
   different file system locations. This can be fixed by changing your builds to
   use the new normalized triple. However, we recommend instead getting the
   normalized triple from clang itself, as this will make your builds more

From 32b786c92f0ae52201888dcfba5c3ac789afbb3a Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas@arm.com>
Date: Tue, 23 Jul 2024 19:24:41 +0100
Subject: [PATCH 079/427] [clang][FMV][AArch64] Improve streaming mode
 compatibility.

* Allow arm-streaming if all the functions versions adhere to it.
* Allow arm-streaming-compatible if all the functions versions adhere to it.
* Allow arm-locally-streaming regardless of the other functions versions.

When the caller needs to toggle the streaming mode all the function versions
of the callee must adhere to the same mode, otherwise the call will yield a
runtime error.

Imagine the versions of the callee live in separate TUs. The version that
is visible to the caller will determine the calling convention used when
generating code for the callsite. Therefore we cannot support mixing
streaming with non-streaming function versions. Imagine TU1 has a streaming
caller and calls foo._sme which is streaming-compatible. The codegen for
the callsite will not switch off the streaming mode. Then in TU2 we have
a version which is non-streaming and could potentially be called in
streaming mode. Similarly if the caller is non-streaming and the called
version is streaming-compatible the codegen for the callsite will not
switch on the streaming mode, but other versions may be streaming.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 -
 clang/lib/Sema/SemaDecl.cpp                   |  24 +++-
 clang/lib/Sema/SemaDeclAttr.cpp               |   7 --
 clang/test/CodeGen/aarch64-fmv-streaming.c    | 107 ++++++++++++++++++
 clang/test/Sema/aarch64-fmv-streaming.c       |  46 ++++++++
 clang/test/Sema/aarch64-sme-func-attrs.c      |  42 -------
 6 files changed, 173 insertions(+), 55 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-fmv-streaming.c
 create mode 100644 clang/test/Sema/aarch64-fmv-streaming.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 95ce4166ceb66..8a00fe21a08ce 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3811,8 +3811,6 @@ def warn_sme_locally_streaming_has_vl_args_returns : Warning<
   InGroup<AArch64SMEAttributes>, DefaultIgnore;
 def err_conflicting_attributes_arm_state : Error<
   "conflicting attributes for state '%0'">;
-def err_sme_streaming_cannot_be_multiversioned : Error<
-  "streaming function cannot be multi-versioned">;
 def err_unknown_arm_state : Error<
   "unknown state '%0'">;
 def err_missing_arm_state : Error<
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f60cc78be4f92..01231f8e385ef 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -11014,6 +11014,9 @@ static bool AttrCompatibleWithMultiVersion(attr::Kind Kind,
   switch (Kind) {
   default:
     return false;
+  case attr::ArmLocallyStreaming:
+    return MVKind == MultiVersionKind::TargetVersion ||
+           MVKind == MultiVersionKind::TargetClones;
   case attr::Used:
     return MVKind == MultiVersionKind::Target;
   case attr::NonNull:
@@ -11150,7 +11153,21 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
     FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo();
     FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo();
 
-    if (OldTypeInfo.getCC() != NewTypeInfo.getCC())
+    const auto *OldFPT = OldFD->getType()->getAs<FunctionProtoType>();
+    const auto *NewFPT = NewFD->getType()->getAs<FunctionProtoType>();
+
+    bool ArmStreamingCCMismatched = false;
+    if (OldFPT && NewFPT) {
+      unsigned Diff =
+          OldFPT->getAArch64SMEAttributes() ^ NewFPT->getAArch64SMEAttributes();
+      // Arm-streaming, arm-streaming-compatible and non-streaming versions
+      // cannot be mixed.
+      if (Diff & (FunctionType::SME_PStateSMEnabledMask |
+                  FunctionType::SME_PStateSMCompatibleMask))
+        ArmStreamingCCMismatched = true;
+    }
+
+    if (OldTypeInfo.getCC() != NewTypeInfo.getCC() || ArmStreamingCCMismatched)
       return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << CallingConv;
 
     QualType OldReturnType = OldType->getReturnType();
@@ -11170,9 +11187,8 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
     if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC())
       return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << LanguageLinkage;
 
-    if (CheckEquivalentExceptionSpec(
-            OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(),
-            NewFD->getType()->getAs<FunctionProtoType>(), NewFD->getLocation()))
+    if (CheckEquivalentExceptionSpec(OldFPT, OldFD->getLocation(), NewFPT,
+                                     NewFD->getLocation()))
       return true;
   }
   return false;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 10bacc17a07ca..e2eada24f9fcc 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3024,9 +3024,6 @@ bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D,
       return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
              << Unsupported << None << CurFeature << TargetVersion;
   }
-  if (IsArmStreamingFunction(cast<FunctionDecl>(D),
-                             /*IncludeLocallyStreaming=*/false))
-    return Diag(LiteralLoc, diag::err_sme_streaming_cannot_be_multiversioned);
   return false;
 }
 
@@ -3123,10 +3120,6 @@ bool Sema::checkTargetClonesAttrString(
           HasNotDefault = true;
         }
       }
-      if (IsArmStreamingFunction(cast<FunctionDecl>(D),
-                                 /*IncludeLocallyStreaming=*/false))
-        return Diag(LiteralLoc,
-                    diag::err_sme_streaming_cannot_be_multiversioned);
     } else {
       // Other targets ( currently X86 )
       if (Cur.starts_with("arch=")) {
diff --git a/clang/test/CodeGen/aarch64-fmv-streaming.c b/clang/test/CodeGen/aarch64-fmv-streaming.c
new file mode 100644
index 0000000000000..e549ccda59ad8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-fmv-streaming.c
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -emit-llvm -o - %s | FileCheck %s
+
+
+// CHECK-LABEL: define {{[^@]+}}@n_callee._Msve
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+//
+// CHECK-LABEL: define {{[^@]+}}@n_callee._Msimd
+// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+//
+__arm_locally_streaming __attribute__((target_clones("sve", "simd"))) void n_callee(void) {}
+// CHECK-LABEL: define {{[^@]+}}@n_callee._Msme2
+// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+//
+__attribute__((target_version("sme2"))) void n_callee(void) {}
+// CHECK-LABEL: define {{[^@]+}}@n_callee.default
+// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+//
+__attribute__((target_version("default"))) void n_callee(void) {}
+
+
+// CHECK-LABEL: define {{[^@]+}}@s_callee._Msve
+// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
+//
+// CHECK-LABEL: define {{[^@]+}}@s_callee._Msimd
+// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
+//
+__attribute__((target_clones("sve", "simd"))) void s_callee(void) __arm_streaming {}
+// CHECK-LABEL: define {{[^@]+}}@s_callee._Msme2
+// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+//
+__arm_locally_streaming __attribute__((target_version("sme2"))) void s_callee(void) __arm_streaming {}
+// CHECK-LABEL: define {{[^@]+}}@s_callee.default
+// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+//
+__attribute__((target_version("default"))) void s_callee(void) __arm_streaming {}
+
+
+// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msve
+// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+//
+// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msimd
+// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
+//
+__attribute__((target_clones("sve", "simd"))) void sc_callee(void) __arm_streaming_compatible {}
+// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msme2
+// CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+//
+__arm_locally_streaming __attribute__((target_version("sme2"))) void sc_callee(void) __arm_streaming_compatible {}
+// CHECK-LABEL: define {{[^@]+}}@sc_callee.default
+// CHECK-SAME: () #[[ATTR11:[0-9]+]] {
+//
+__attribute__((target_version("default"))) void sc_callee(void) __arm_streaming_compatible {}
+
+
+// CHECK-LABEL: define {{[^@]+}}@n_caller
+// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK:    call void @n_callee()
+// CHECK:    call void @s_callee() #[[ATTR12:[0-9]+]]
+// CHECK:    call void @sc_callee() #[[ATTR13:[0-9]+]]
+//
+void n_caller(void) {
+  n_callee();
+  s_callee();
+  sc_callee();
+}
+
+
+// CHECK-LABEL: define {{[^@]+}}@s_caller
+// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK:    call void @n_callee()
+// CHECK:    call void @s_callee() #[[ATTR12]]
+// CHECK:    call void @sc_callee() #[[ATTR13]]
+//
+void s_caller(void) __arm_streaming {
+  n_callee();
+  s_callee();
+  sc_callee();
+}
+
+
+// CHECK-LABEL: define {{[^@]+}}@sc_caller
+// CHECK-SAME: () #[[ATTR11:[0-9]+]] {
+// CHECK:    call void @n_callee()
+// CHECK:    call void @s_callee() #[[ATTR12]]
+// CHECK:    call void @sc_callee() #[[ATTR13]]
+//
+void sc_caller(void) __arm_streaming_compatible {
+  n_callee();
+  s_callee();
+  sc_callee();
+}
+
+
+// CHECK: attributes #[[ATTR0:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body"
+// CHECK: attributes #[[ATTR1:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body"
+// CHECK: attributes #[[ATTR2:[0-9]+]] = {{.*}}
+// CHECK: attributes #[[ATTR3]] = {{.*}}
+// CHECK: attributes #[[ATTR4:[0-9]+]] = {{.*}} "aarch64_pstate_sm_enabled"
+// CHECK: attributes #[[ATTR5:[0-9]+]] = {{.*}} "aarch64_pstate_sm_enabled"
+// CHECK: attributes #[[ATTR6:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body" "aarch64_pstate_sm_enabled"
+// CHECK: attributes #[[ATTR7]] = {{.*}} "aarch64_pstate_sm_enabled"
+// CHECK: attributes #[[ATTR8:[0-9]+]] = {{.*}} "aarch64_pstate_sm_compatible"
+// CHECK: attributes #[[ATTR9:[0-9]+]] = {{.*}} "aarch64_pstate_sm_compatible"
+// CHECK: attributes #[[ATTR10]] = {{.*}} "aarch64_pstate_sm_body" "aarch64_pstate_sm_compatible"
+// CHECK: attributes #[[ATTR11]] = {{.*}} "aarch64_pstate_sm_compatible"
+// CHECK: attributes #[[ATTR12]] = {{.*}} "aarch64_pstate_sm_enabled"
+// CHECK: attributes #[[ATTR13]] = {{.*}} "aarch64_pstate_sm_compatible"
diff --git a/clang/test/Sema/aarch64-fmv-streaming.c b/clang/test/Sema/aarch64-fmv-streaming.c
new file mode 100644
index 0000000000000..93b7656216c0c
--- /dev/null
+++ b/clang/test/Sema/aarch64-fmv-streaming.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -Waarch64-sme-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s
+
+__attribute__((target_clones("sve", "simd"))) void ok_arm_streaming(void) __arm_streaming {}
+__arm_locally_streaming __attribute__((target_version("sme2"))) void ok_arm_streaming(void) __arm_streaming {}
+__attribute__((target_version("default"))) void ok_arm_streaming(void) __arm_streaming {}
+
+__attribute__((target_clones("sve", "simd"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}
+__arm_locally_streaming __attribute__((target_version("sme2"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}
+__attribute__((target_version("default"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}
+
+__arm_locally_streaming __attribute__((target_clones("sve", "simd"))) void ok_no_streaming(void) {}
+__attribute__((target_version("sme2"))) void ok_no_streaming(void) {}
+__attribute__((target_version("default"))) void ok_no_streaming(void) {}
+
+__attribute__((target_clones("sve", "simd"))) void bad_mixed_streaming(void) {}
+// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
+// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
+__attribute__((target_version("sme2"))) void bad_mixed_streaming(void) __arm_streaming {}
+// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
+// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
+__attribute__((target_version("default"))) void bad_mixed_streaming(void) __arm_streaming_compatible {}
+// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
+// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
+__arm_locally_streaming __attribute__((target_version("dotprod"))) void bad_mixed_streaming(void) __arm_streaming {}
+
+void n_caller(void) {
+  ok_arm_streaming();
+  ok_arm_streaming_compatible();
+  ok_no_streaming();
+  bad_mixed_streaming();
+}
+
+void s_caller(void) __arm_streaming {
+  ok_arm_streaming();
+  ok_arm_streaming_compatible();
+  ok_no_streaming();
+  bad_mixed_streaming();
+}
+
+void sc_caller(void) __arm_streaming_compatible {
+  ok_arm_streaming();
+  ok_arm_streaming_compatible();
+  ok_no_streaming();
+  bad_mixed_streaming();
+}
diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c
index 6db39d6a71e36..0c263eb2610cf 100644
--- a/clang/test/Sema/aarch64-sme-func-attrs.c
+++ b/clang/test/Sema/aarch64-sme-func-attrs.c
@@ -455,48 +455,6 @@ void unimplemented_spill_fill_za(void (*share_zt0_only)(void) __arm_inout("zt0")
   share_zt0_only();
 }
 
-// expected-cpp-error@+2 {{streaming function cannot be multi-versioned}}
-// expected-error@+1 {{streaming function cannot be multi-versioned}}
-__attribute__((target_version("sme2")))
-void cannot_work_version(void) __arm_streaming {}
-// expected-cpp-error@+5 {{function declared 'void ()' was previously declared 'void () __arm_streaming', which has different SME function attributes}}
-// expected-cpp-note@-2 {{previous declaration is here}}
-// expected-error@+3 {{function declared 'void (void)' was previously declared 'void (void) __arm_streaming', which has different SME function attributes}}
-// expected-note@-4 {{previous declaration is here}}
-__attribute__((target_version("default")))
-void cannot_work_version(void) {}
-
-
-// expected-cpp-error@+2 {{streaming function cannot be multi-versioned}}
-// expected-error@+1 {{streaming function cannot be multi-versioned}}
-__attribute__((target_clones("sme2")))
-void cannot_work_clones(void) __arm_streaming {}
-
-
-__attribute__((target("sme2")))
-void just_fine_streaming(void) __arm_streaming {}
-__attribute__((target_version("sme2")))
-void just_fine(void) { just_fine_streaming(); }
-__attribute__((target_version("default")))
-void just_fine(void) {}
-
-
-__arm_locally_streaming
-__attribute__((target_version("sme2")))
-void incompatible_locally_streaming(void) {}
-// expected-error@-1 {{attribute 'target_version' multiversioning cannot be combined with attribute '__arm_locally_streaming'}}
-// expected-cpp-error@-2 {{attribute 'target_version' multiversioning cannot be combined with attribute '__arm_locally_streaming'}}
-__attribute__((target_version("default")))
-void incompatible_locally_streaming(void) {}
-
-
-void fmv_caller() {
-    cannot_work_version();
-    cannot_work_clones();
-    just_fine();
-    incompatible_locally_streaming();
-}
-
 void sme_streaming_with_vl_arg(__SVInt8_t a) __arm_streaming { }
 
 __SVInt8_t sme_streaming_returns_vl(void) __arm_streaming { __SVInt8_t r; return r; }

From 742576dc3b332d0f67e883b445f482a51ea1feec Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 30 Jul 2024 09:25:03 +0200
Subject: [PATCH 080/427] [Sanitizers] Avoid overload ambiguity for
 interceptors (#100986)

Since glibc 2.40 some functions like openat make use of overloads when
built with `-D_FORTIFY_SOURCE=2`, see:
https://github.com/bminor/glibc/blob/master/io/bits/fcntl2.h

This means that doing something like `(uintptr_t) openat` or `(void *)
openat` is now ambiguous, breaking the compiler-rt build on new glibc
versions.

Fix this by explicitly casting the symbol to the expected function type
before casting it to an intptr. The expected type is obtained as
`decltype(REAL(func))` so we don't have to repeat the signature from
INTERCEPTOR in the INTERCEPT_FUNTION macro.

Fixes https://github.com/llvm/llvm-project/issues/100754.

(cherry picked from commit 155b7a12820ec45095988b6aa6e057afaf2bc892)
---
 .../lib/interception/interception_linux.h        | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/lib/interception/interception_linux.h b/compiler-rt/lib/interception/interception_linux.h
index 433a3d9bd7fa7..2e01ff44578c3 100644
--- a/compiler-rt/lib/interception/interception_linux.h
+++ b/compiler-rt/lib/interception/interception_linux.h
@@ -28,12 +28,14 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real,
                        uptr func, uptr trampoline);
 }  // namespace __interception
 
-#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \
-  ::__interception::InterceptFunction(            \
-      #func,                                      \
-      (::__interception::uptr *)&REAL(func),      \
-      (::__interception::uptr)&(func),            \
-      (::__interception::uptr)&TRAMPOLINE(func))
+// Cast func to type of REAL(func) before casting to uptr in case it is an
+// overloaded function, which is the case for some glibc functions when
+// _FORTIFY_SOURCE is used. This disambiguates which overload to use.
+#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func)            \
+  ::__interception::InterceptFunction(                       \
+      #func, (::__interception::uptr *)&REAL(func),          \
+      (::__interception::uptr)(decltype(REAL(func)))&(func), \
+      (::__interception::uptr) &TRAMPOLINE(func))
 
 // dlvsym is a GNU extension supported by some other platforms.
 #if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
@@ -41,7 +43,7 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real,
   ::__interception::InterceptFunction(                        \
       #func, symver,                                          \
       (::__interception::uptr *)&REAL(func),                  \
-      (::__interception::uptr)&(func),                        \
+      (::__interception::uptr)(decltype(REAL(func)))&(func),  \
       (::__interception::uptr)&TRAMPOLINE(func))
 #else
 #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \

From 03ae9f9fc62b0283505d2d363118b04dd5d947a8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 30 Jul 2024 14:52:29 -0700
Subject: [PATCH 081/427] Revert "[MC] Compute fragment offsets eagerly"

This reverts commit 1a47f3f3db66589c11f8ddacfeaecc03fb80c510.

Fix #100283

This commit is actually a trigger of other preexisting problems:

* Size change of fill fragments does not influence the fixed-point iteration.
* The `invalid number of bytes` error is reported too early. Since
  `.zero A-B` might have temporary negative values in the first few
  iterations.

However, the problems appeared at least "benign" (did not affect the
Linux kernel builds) before this commit.

(cherry picked from commit 4eb5450f630849ee0518487de38d857fbe5b1aee)
---
 llvm/include/llvm/MC/MCAsmBackend.h           |  5 +-
 llvm/include/llvm/MC/MCAssembler.h            |  4 +-
 llvm/include/llvm/MC/MCSection.h              |  5 ++
 llvm/lib/MC/MCAssembler.cpp                   | 77 +++++++++----------
 llvm/lib/MC/MCSection.cpp                     |  4 +-
 .../MCTargetDesc/HexagonAsmBackend.cpp        |  4 +-
 .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 26 +++++--
 7 files changed, 71 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index d1d1814dd8b52..3f88ac02cd92a 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -217,9 +217,8 @@ class MCAsmBackend {
   virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
                             const MCSubtargetInfo *STI) const = 0;
 
-  // Return true if fragment offsets have been adjusted and an extra layout
-  // iteration is needed.
-  virtual bool finishLayout(const MCAssembler &Asm) const { return false; }
+  /// Give backend an opportunity to finish layout after relaxation
+  virtual void finishLayout(MCAssembler const &Asm) const {}
 
   /// Handle any target-specific assembler flags. By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index d9752912ee66a..c6fa48128d189 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -111,7 +111,6 @@ class MCAssembler {
   /// Check whether the given fragment needs relaxation.
   bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF) const;
 
-  void layoutSection(MCSection &Sec);
   /// Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool layoutOnce();
@@ -148,9 +147,10 @@ class MCAssembler {
   uint64_t computeFragmentSize(const MCFragment &F) const;
 
   void layoutBundle(MCFragment *Prev, MCFragment *F) const;
+  void ensureValid(MCSection &Sec) const;
 
   // Get the offset of the given fragment inside its containing section.
-  uint64_t getFragmentOffset(const MCFragment &F) const { return F.Offset; }
+  uint64_t getFragmentOffset(const MCFragment &F) const;
 
   uint64_t getSectionAddressSize(const MCSection &Sec) const;
   uint64_t getSectionFileSize(const MCSection &Sec) const;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index 1289d6f6f9f65..dcdcd094fa17b 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -99,6 +99,8 @@ class MCSection {
   /// Whether this section has had instructions emitted into it.
   bool HasInstructions : 1;
 
+  bool HasLayout : 1;
+
   bool IsRegistered : 1;
 
   bool IsText : 1;
@@ -167,6 +169,9 @@ class MCSection {
   bool hasInstructions() const { return HasInstructions; }
   void setHasInstructions(bool Value) { HasInstructions = Value; }
 
+  bool hasLayout() const { return HasLayout; }
+  void setHasLayout(bool Value) { HasLayout = Value; }
+
   bool isRegistered() const { return IsRegistered; }
   void setIsRegistered(bool Value) { IsRegistered = Value; }
 
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index ceeb7af0fecc4..c3da4bb5cc363 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -432,6 +432,28 @@ void MCAssembler::layoutBundle(MCFragment *Prev, MCFragment *F) const {
       DF->Offset = EF->Offset;
 }
 
+void MCAssembler::ensureValid(MCSection &Sec) const {
+  if (Sec.hasLayout())
+    return;
+  Sec.setHasLayout(true);
+  MCFragment *Prev = nullptr;
+  uint64_t Offset = 0;
+  for (MCFragment &F : Sec) {
+    F.Offset = Offset;
+    if (isBundlingEnabled() && F.hasInstructions()) {
+      layoutBundle(Prev, &F);
+      Offset = F.Offset;
+    }
+    Offset += computeFragmentSize(F);
+    Prev = &F;
+  }
+}
+
+uint64_t MCAssembler::getFragmentOffset(const MCFragment &F) const {
+  ensureValid(*F.getParent());
+  return F.Offset;
+}
+
 // Simple getSymbolOffset helper for the non-variable case.
 static bool getLabelOffset(const MCAssembler &Asm, const MCSymbol &S,
                            bool ReportError, uint64_t &Val) {
@@ -916,20 +938,22 @@ void MCAssembler::layout() {
 
   // Layout until everything fits.
   this->HasLayout = true;
-  for (MCSection &Sec : *this)
-    layoutSection(Sec);
   while (layoutOnce()) {
+    if (getContext().hadError())
+      return;
+    // Size of fragments in one section can depend on the size of fragments in
+    // another. If any fragment has changed size, we have to re-layout (and
+    // as a result possibly further relax) all.
+    for (MCSection &Sec : *this)
+      Sec.setHasLayout(false);
   }
 
   DEBUG_WITH_TYPE("mc-dump", {
       errs() << "assembler backend - post-relaxation\n--\n";
       dump(); });
 
-  // Some targets might want to adjust fragment offsets. If so, perform another
-  // layout loop.
-  if (getBackend().finishLayout(*this))
-    for (MCSection &Sec : *this)
-      layoutSection(Sec);
+  // Finalize the layout, including fragment lowering.
+  getBackend().finishLayout(*this);
 
   DEBUG_WITH_TYPE("mc-dump", {
       errs() << "assembler backend - final-layout\n--\n";
@@ -1282,42 +1306,15 @@ bool MCAssembler::relaxFragment(MCFragment &F) {
   }
 }
 
-void MCAssembler::layoutSection(MCSection &Sec) {
-  MCFragment *Prev = nullptr;
-  uint64_t Offset = 0;
-  for (MCFragment &F : Sec) {
-    F.Offset = Offset;
-    if (LLVM_UNLIKELY(isBundlingEnabled())) {
-      if (F.hasInstructions()) {
-        layoutBundle(Prev, &F);
-        Offset = F.Offset;
-      }
-      Prev = &F;
-    }
-    Offset += computeFragmentSize(F);
-  }
-}
-
 bool MCAssembler::layoutOnce() {
   ++stats::RelaxationSteps;
 
-  // Size of fragments in one section can depend on the size of fragments in
-  // another. If any fragment has changed size, we have to re-layout (and
-  // as a result possibly further relax) all.
-  bool ChangedAny = false;
-  for (MCSection &Sec : *this) {
-    for (;;) {
-      bool Changed = false;
-      for (MCFragment &F : Sec)
-        if (relaxFragment(F))
-          Changed = true;
-      ChangedAny |= Changed;
-      if (!Changed)
-        break;
-      layoutSection(Sec);
-    }
-  }
-  return ChangedAny;
+  bool Changed = false;
+  for (MCSection &Sec : *this)
+    for (MCFragment &Frag : Sec)
+      if (relaxFragment(Frag))
+        Changed = true;
+  return Changed;
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 97e87a41c8ce5..8c2ee5635a49c 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -23,8 +23,8 @@ using namespace llvm;
 MCSection::MCSection(SectionVariant V, StringRef Name, bool IsText,
                      bool IsVirtual, MCSymbol *Begin)
     : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
-      IsRegistered(false), IsText(IsText), IsVirtual(IsVirtual), Name(Name),
-      Variant(V) {
+      HasLayout(false), IsRegistered(false), IsText(IsText),
+      IsVirtual(IsVirtual), Name(Name), Variant(V) {
   DummyFragment.setParent(this);
   // The initial subsection number is 0. Create a fragment list.
   CurFragList = &Subsections.emplace_back(0u, FragList{}).second;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 1570493b765ca..6acc37e599f2e 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -702,7 +702,7 @@ class HexagonAsmBackend : public MCAsmBackend {
     return true;
   }
 
-  bool finishLayout(const MCAssembler &Asm) const override {
+  void finishLayout(MCAssembler const &Asm) const override {
     SmallVector<MCFragment *> Frags;
     for (MCSection &Sec : Asm) {
       Frags.clear();
@@ -747,6 +747,7 @@ class HexagonAsmBackend : public MCAsmBackend {
               //assert(!Error);
               (void)Error;
               ReplaceInstruction(Asm.getEmitter(), RF, Inst);
+              Sec.setHasLayout(false);
               Size = 0; // Only look back one instruction
               break;
             }
@@ -756,7 +757,6 @@ class HexagonAsmBackend : public MCAsmBackend {
         }
       }
     }
-    return true;
   }
 }; // class HexagonAsmBackend
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index fcc61d0a5e2f6..67d993a51ad97 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -201,7 +201,7 @@ class X86AsmBackend : public MCAsmBackend {
   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
                               unsigned &RemainingSize) const;
 
-  bool finishLayout(const MCAssembler &Asm) const override;
+  void finishLayout(const MCAssembler &Asm) const override;
 
   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
 
@@ -856,7 +856,7 @@ bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
   return Changed;
 }
 
-bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
+void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
   // See if we can further relax some instructions to cut down on the number of
   // nop bytes required for code alignment.  The actual win is in reducing
   // instruction count, not number of bytes.  Modern X86-64 can easily end up
@@ -864,7 +864,7 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
   // (i.e. eliminate nops) even at the cost of increasing the size and
   // complexity of others.
   if (!X86PadForAlign && !X86PadForBranchAlign)
-    return false;
+    return;
 
   // The processed regions are delimitered by LabeledFragments. -g may have more
   // MCSymbols and therefore different relaxation results. X86PadForAlign is
@@ -911,6 +911,9 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
         continue;
       }
 
+#ifndef NDEBUG
+      const uint64_t OrigOffset = Asm.getFragmentOffset(F);
+#endif
       const uint64_t OrigSize = Asm.computeFragmentSize(F);
 
       // To keep the effects local, prefer to relax instructions closest to
@@ -923,7 +926,8 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
         // Give the backend a chance to play any tricks it wishes to increase
         // the encoding size of the given instruction.  Target independent code
         // will try further relaxation, but target's may play further tricks.
-        padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize);
+        if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
+          Sec.setHasLayout(false);
 
         // If we have an instruction which hasn't been fully relaxed, we can't
         // skip past it and insert bytes before it.  Changing its starting
@@ -940,6 +944,14 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
       if (F.getKind() == MCFragment::FT_BoundaryAlign)
         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
 
+#ifndef NDEBUG
+      const uint64_t FinalOffset = Asm.getFragmentOffset(F);
+      const uint64_t FinalSize = Asm.computeFragmentSize(F);
+      assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
+             "can't move start of next fragment!");
+      assert(FinalSize == RemainingSize && "inconsistent size computation?");
+#endif
+
       // If we're looking at a boundary align, make sure we don't try to pad
       // its target instructions for some following directive.  Doing so would
       // break the alignment of the current boundary align.
@@ -953,7 +965,11 @@ bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
     }
   }
 
-  return true;
+  // The layout is done. Mark every fragment as valid.
+  for (MCSection &Section : Asm) {
+    Asm.getFragmentOffset(*Section.curFragList()->Tail);
+    Asm.computeFragmentSize(*Section.curFragList()->Tail);
+  }
 }
 
 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {

From b14801954e346a3d2f89f4047f0b0bf457bb0194 Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen@sifive.com>
Date: Wed, 31 Jul 2024 00:54:03 -0700
Subject: [PATCH 082/427] Revert "[compiler-rt][RISCV] Implement
 __init_riscv_feature_bits (#85790)"

This reverts commit a41a4ac78294c728fb70a51623c602ea7f3e308a.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |   1 -
 compiler-rt/lib/builtins/riscv/feature_bits.c | 298 ------------------
 2 files changed, 299 deletions(-)
 delete mode 100644 compiler-rt/lib/builtins/riscv/feature_bits.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 88a5998fd4610..abea8c498f7bd 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -739,7 +739,6 @@ endif()
 set(powerpc64le_SOURCES ${powerpc64_SOURCES})
 
 set(riscv_SOURCES
-  riscv/feature_bits.c
   riscv/fp_mode.c
   riscv/save.S
   riscv/restore.S
diff --git a/compiler-rt/lib/builtins/riscv/feature_bits.c b/compiler-rt/lib/builtins/riscv/feature_bits.c
deleted file mode 100644
index 77422935bd2d3..0000000000000
--- a/compiler-rt/lib/builtins/riscv/feature_bits.c
+++ /dev/null
@@ -1,298 +0,0 @@
-//=== feature_bits.c - Update RISC-V Feature Bits Structure -*- C -*-=========//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define RISCV_FEATURE_BITS_LENGTH 1
-struct {
-  unsigned length;
-  unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
-} __riscv_feature_bits __attribute__((visibility("hidden"), nocommon));
-
-#define RISCV_VENDOR_FEATURE_BITS_LENGTH 1
-struct {
-  unsigned vendorID;
-  unsigned length;
-  unsigned long long features[RISCV_VENDOR_FEATURE_BITS_LENGTH];
-} __riscv_vendor_feature_bits __attribute__((visibility("hidden"), nocommon));
-
-// NOTE: Should sync-up with RISCVFeatures.td
-// TODO: Maybe generate a header from tablegen then include it.
-#define A_GROUPID 0
-#define A_BITMASK (1ULL << 0)
-#define C_GROUPID 0
-#define C_BITMASK (1ULL << 2)
-#define D_GROUPID 0
-#define D_BITMASK (1ULL << 3)
-#define F_GROUPID 0
-#define F_BITMASK (1ULL << 5)
-#define I_GROUPID 0
-#define I_BITMASK (1ULL << 8)
-#define M_GROUPID 0
-#define M_BITMASK (1ULL << 12)
-#define V_GROUPID 0
-#define V_BITMASK (1ULL << 21)
-#define ZACAS_GROUPID 0
-#define ZACAS_BITMASK (1ULL << 26)
-#define ZBA_GROUPID 0
-#define ZBA_BITMASK (1ULL << 27)
-#define ZBB_GROUPID 0
-#define ZBB_BITMASK (1ULL << 28)
-#define ZBC_GROUPID 0
-#define ZBC_BITMASK (1ULL << 29)
-#define ZBKB_GROUPID 0
-#define ZBKB_BITMASK (1ULL << 30)
-#define ZBKC_GROUPID 0
-#define ZBKC_BITMASK (1ULL << 31)
-#define ZBKX_GROUPID 0
-#define ZBKX_BITMASK (1ULL << 32)
-#define ZBS_GROUPID 0
-#define ZBS_BITMASK (1ULL << 33)
-#define ZFA_GROUPID 0
-#define ZFA_BITMASK (1ULL << 34)
-#define ZFH_GROUPID 0
-#define ZFH_BITMASK (1ULL << 35)
-#define ZFHMIN_GROUPID 0
-#define ZFHMIN_BITMASK (1ULL << 36)
-#define ZICBOZ_GROUPID 0
-#define ZICBOZ_BITMASK (1ULL << 37)
-#define ZICOND_GROUPID 0
-#define ZICOND_BITMASK (1ULL << 38)
-#define ZIHINTNTL_GROUPID 0
-#define ZIHINTNTL_BITMASK (1ULL << 39)
-#define ZIHINTPAUSE_GROUPID 0
-#define ZIHINTPAUSE_BITMASK (1ULL << 40)
-#define ZKND_GROUPID 0
-#define ZKND_BITMASK (1ULL << 41)
-#define ZKNE_GROUPID 0
-#define ZKNE_BITMASK (1ULL << 42)
-#define ZKNH_GROUPID 0
-#define ZKNH_BITMASK (1ULL << 43)
-#define ZKSED_GROUPID 0
-#define ZKSED_BITMASK (1ULL << 44)
-#define ZKSH_GROUPID 0
-#define ZKSH_BITMASK (1ULL << 45)
-#define ZKT_GROUPID 0
-#define ZKT_BITMASK (1ULL << 46)
-#define ZTSO_GROUPID 0
-#define ZTSO_BITMASK (1ULL << 47)
-#define ZVBB_GROUPID 0
-#define ZVBB_BITMASK (1ULL << 48)
-#define ZVBC_GROUPID 0
-#define ZVBC_BITMASK (1ULL << 49)
-#define ZVFH_GROUPID 0
-#define ZVFH_BITMASK (1ULL << 50)
-#define ZVFHMIN_GROUPID 0
-#define ZVFHMIN_BITMASK (1ULL << 51)
-#define ZVKB_GROUPID 0
-#define ZVKB_BITMASK (1ULL << 52)
-#define ZVKG_GROUPID 0
-#define ZVKG_BITMASK (1ULL << 53)
-#define ZVKNED_GROUPID 0
-#define ZVKNED_BITMASK (1ULL << 54)
-#define ZVKNHA_GROUPID 0
-#define ZVKNHA_BITMASK (1ULL << 55)
-#define ZVKNHB_GROUPID 0
-#define ZVKNHB_BITMASK (1ULL << 56)
-#define ZVKSED_GROUPID 0
-#define ZVKSED_BITMASK (1ULL << 57)
-#define ZVKSH_GROUPID 0
-#define ZVKSH_BITMASK (1ULL << 58)
-#define ZVKT_GROUPID 0
-#define ZVKT_BITMASK (1ULL << 59)
-
-#if defined(__linux__)
-
-static long syscall_impl_5_args(long number, long arg1, long arg2, long arg3,
-                                long arg4, long arg5) {
-  register long a7 __asm__("a7") = number;
-  register long a0 __asm__("a0") = arg1;
-  register long a1 __asm__("a1") = arg2;
-  register long a2 __asm__("a2") = arg3;
-  register long a3 __asm__("a3") = arg4;
-  register long a4 __asm__("a4") = arg5;
-  __asm__ __volatile__("ecall\n\t"
-                       : "=r"(a0)
-                       : "r"(a7), "r"(a0), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
-                       : "memory");
-  return a0;
-}
-
-#define RISCV_HWPROBE_KEY_MVENDORID 0
-#define RISCV_HWPROBE_KEY_MARCHID 1
-#define RISCV_HWPROBE_KEY_MIMPID 2
-#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
-#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1ULL << 0)
-#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
-#define RISCV_HWPROBE_IMA_FD (1ULL << 0)
-#define RISCV_HWPROBE_IMA_C (1ULL << 1)
-#define RISCV_HWPROBE_IMA_V (1ULL << 2)
-#define RISCV_HWPROBE_EXT_ZBA (1ULL << 3)
-#define RISCV_HWPROBE_EXT_ZBB (1ULL << 4)
-#define RISCV_HWPROBE_EXT_ZBS (1ULL << 5)
-#define RISCV_HWPROBE_EXT_ZICBOZ (1ULL << 6)
-#define RISCV_HWPROBE_EXT_ZBC (1ULL << 7)
-#define RISCV_HWPROBE_EXT_ZBKB (1ULL << 8)
-#define RISCV_HWPROBE_EXT_ZBKC (1ULL << 9)
-#define RISCV_HWPROBE_EXT_ZBKX (1ULL << 10)
-#define RISCV_HWPROBE_EXT_ZKND (1ULL << 11)
-#define RISCV_HWPROBE_EXT_ZKNE (1ULL << 12)
-#define RISCV_HWPROBE_EXT_ZKNH (1ULL << 13)
-#define RISCV_HWPROBE_EXT_ZKSED (1ULL << 14)
-#define RISCV_HWPROBE_EXT_ZKSH (1ULL << 15)
-#define RISCV_HWPROBE_EXT_ZKT (1ULL << 16)
-#define RISCV_HWPROBE_EXT_ZVBB (1ULL << 17)
-#define RISCV_HWPROBE_EXT_ZVBC (1ULL << 18)
-#define RISCV_HWPROBE_EXT_ZVKB (1ULL << 19)
-#define RISCV_HWPROBE_EXT_ZVKG (1ULL << 20)
-#define RISCV_HWPROBE_EXT_ZVKNED (1ULL << 21)
-#define RISCV_HWPROBE_EXT_ZVKNHA (1ULL << 22)
-#define RISCV_HWPROBE_EXT_ZVKNHB (1ULL << 23)
-#define RISCV_HWPROBE_EXT_ZVKSED (1ULL << 24)
-#define RISCV_HWPROBE_EXT_ZVKSH (1ULL << 25)
-#define RISCV_HWPROBE_EXT_ZVKT (1ULL << 26)
-#define RISCV_HWPROBE_EXT_ZFH (1ULL << 27)
-#define RISCV_HWPROBE_EXT_ZFHMIN (1ULL << 28)
-#define RISCV_HWPROBE_EXT_ZIHINTNTL (1ULL << 29)
-#define RISCV_HWPROBE_EXT_ZVFH (1ULL << 30)
-#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31)
-#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
-#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
-#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
-#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
-#define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36)
-#define RISCV_HWPROBE_KEY_CPUPERF_0 5
-#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
-#define RISCV_HWPROBE_MISALIGNED_EMULATED (1ULL << 0)
-#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
-#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
-#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
-#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
-#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
-/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
-
-struct riscv_hwprobe {
-  long long key;
-  unsigned long long value;
-};
-
-#define __NR_riscv_hwprobe 258
-static long initHwProbe(struct riscv_hwprobe *Hwprobes, int len) {
-  return syscall_impl_5_args(__NR_riscv_hwprobe, (long)Hwprobes, len, 0, 0, 0);
-}
-
-#define SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(EXTNAME)                    \
-  SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_EXT_##EXTNAME, EXTNAME)
-
-#define SET_SINGLE_IMAEXT_RISCV_FEATURE(HWPROBE_BITMASK, EXT)                  \
-  SET_SINGLE_RISCV_FEATURE(IMAEXT0Value &HWPROBE_BITMASK, EXT)
-
-#define SET_SINGLE_RISCV_FEATURE(COND, EXT)                                    \
-  if (COND) {                                                                  \
-    SET_RISCV_FEATURE(EXT);                                                    \
-  }
-
-#define SET_RISCV_FEATURE(EXT) features[EXT##_GROUPID] |= EXT##_BITMASK
-
-static void initRISCVFeature(struct riscv_hwprobe Hwprobes[]) {
-
-  // Note: If a hwprobe key is unknown to the kernel, its key field
-  // will be cleared to -1, and its value set to 0.
-  // This unsets all extension bitmask bits.
-
-  // Init vendor extension
-  __riscv_vendor_feature_bits.length = 0;
-  __riscv_vendor_feature_bits.vendorID = Hwprobes[2].value;
-
-  // Init standard extension
-  // TODO: Maybe Extension implied generate from tablegen?
-  __riscv_feature_bits.length = RISCV_FEATURE_BITS_LENGTH;
-
-  unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
-  int i;
-
-  for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
-    features[i] = 0;
-
-  // Check RISCV_HWPROBE_KEY_BASE_BEHAVIOR
-  unsigned long long BaseValue = Hwprobes[0].value;
-  if (BaseValue & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) {
-    SET_RISCV_FEATURE(I);
-    SET_RISCV_FEATURE(M);
-    SET_RISCV_FEATURE(A);
-  }
-
-  // Check RISCV_HWPROBE_KEY_IMA_EXT_0
-  unsigned long long IMAEXT0Value = Hwprobes[1].value;
-  if (IMAEXT0Value & RISCV_HWPROBE_IMA_FD) {
-    SET_RISCV_FEATURE(F);
-    SET_RISCV_FEATURE(D);
-  }
-
-  SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_C, C);
-  SET_SINGLE_IMAEXT_RISCV_FEATURE(RISCV_HWPROBE_IMA_V, V);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBA);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBB);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBS);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICBOZ);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBC);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKB);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKC);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZBKX);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKND);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNE);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKNH);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSED);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKSH);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZKT);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBB);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVBC);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKB);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKG);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNED);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHA);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKNHB);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSED);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKSH);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVKT);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFH);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFHMIN);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTNTL);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZIHINTPAUSE);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFH);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZVFHMIN);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZFA);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZTSO);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZACAS);
-  SET_RISCV_HWPROBE_EXT_SINGLE_RISCV_FEATURE(ZICOND);
-
-  for (i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
-    __riscv_feature_bits.features[i] = features[i];
-}
-
-#endif // defined(__linux__)
-
-static int FeaturesBitCached = 0;
-
-void __init_riscv_feature_bits() {
-
-  if (FeaturesBitCached)
-    return;
-
-#if defined(__linux__)
-  struct riscv_hwprobe Hwprobes[] = {
-      {RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0},
-      {RISCV_HWPROBE_KEY_IMA_EXT_0, 0},
-      {RISCV_HWPROBE_KEY_MVENDORID, 0},
-  };
-  if (initHwProbe(Hwprobes, sizeof(Hwprobes) / sizeof(Hwprobes[0])))
-    return;
-
-  initRISCVFeature(Hwprobes);
-#endif // defined(__linux__)
-
-  FeaturesBitCached = 1;
-}

From 0e615206e3b2c5f329cd612c09f3237c6060c06e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 31 Jul 2024 10:40:14 -0400
Subject: [PATCH 083/427] [libc++] Revert "Use GCC type traits builtins for
 remove_cv and remove_cvref (#81386)"

This reverts commit 55357160d0e151c32f86e1d6683b4bddbb706aa1.
This is only being reverted from the LLVM 19 branch as a
convenience to avoid breaking some IDEs which were not ready
for that change.

Fixes #99464
---
 libcxx/include/__type_traits/remove_cv.h    | 15 +++++++++++----
 libcxx/include/__type_traits/remove_cvref.h | 15 +++++----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/libcxx/include/__type_traits/remove_cv.h b/libcxx/include/__type_traits/remove_cv.h
index 50e9f3e8aa78d..c4bf612794bd5 100644
--- a/libcxx/include/__type_traits/remove_cv.h
+++ b/libcxx/include/__type_traits/remove_cv.h
@@ -10,6 +10,8 @@
 #define _LIBCPP___TYPE_TRAITS_REMOVE_CV_H
 
 #include <__config>
+#include <__type_traits/remove_const.h>
+#include <__type_traits/remove_volatile.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -17,18 +19,23 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+#if __has_builtin(__remove_cv) && !defined(_LIBCPP_COMPILER_GCC)
 template <class _Tp>
 struct remove_cv {
   using type _LIBCPP_NODEBUG = __remove_cv(_Tp);
 };
 
-#if defined(_LIBCPP_COMPILER_GCC)
 template <class _Tp>
-using __remove_cv_t = typename remove_cv<_Tp>::type;
+using __remove_cv_t = __remove_cv(_Tp);
 #else
 template <class _Tp>
-using __remove_cv_t = __remove_cv(_Tp);
-#endif
+struct _LIBCPP_TEMPLATE_VIS remove_cv {
+  typedef __remove_volatile_t<__remove_const_t<_Tp> > type;
+};
+
+template <class _Tp>
+using __remove_cv_t = __remove_volatile_t<__remove_const_t<_Tp> >;
+#endif // __has_builtin(__remove_cv)
 
 #if _LIBCPP_STD_VER >= 14
 template <class _Tp>
diff --git a/libcxx/include/__type_traits/remove_cvref.h b/libcxx/include/__type_traits/remove_cvref.h
index 55f894dbd1d81..e8e8745ab0960 100644
--- a/libcxx/include/__type_traits/remove_cvref.h
+++ b/libcxx/include/__type_traits/remove_cvref.h
@@ -20,26 +20,21 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if defined(_LIBCPP_COMPILER_GCC)
+#if __has_builtin(__remove_cvref) && !defined(_LIBCPP_COMPILER_GCC)
 template <class _Tp>
-struct __remove_cvref_gcc {
-  using type = __remove_cvref(_Tp);
-};
-
-template <class _Tp>
-using __remove_cvref_t _LIBCPP_NODEBUG = typename __remove_cvref_gcc<_Tp>::type;
+using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cvref(_Tp);
 #else
 template <class _Tp>
-using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cvref(_Tp);
+using __remove_cvref_t _LIBCPP_NODEBUG = __remove_cv_t<__libcpp_remove_reference_t<_Tp> >;
 #endif // __has_builtin(__remove_cvref)
 
 template <class _Tp, class _Up>
-using __is_same_uncvref = _IsSame<__remove_cvref_t<_Tp>, __remove_cvref_t<_Up> >;
+struct __is_same_uncvref : _IsSame<__remove_cvref_t<_Tp>, __remove_cvref_t<_Up> > {};
 
 #if _LIBCPP_STD_VER >= 20
 template <class _Tp>
 struct remove_cvref {
-  using type _LIBCPP_NODEBUG = __remove_cvref(_Tp);
+  using type _LIBCPP_NODEBUG = __remove_cvref_t<_Tp>;
 };
 
 template <class _Tp>

From c3004032c244cb5264790dc535437b9c3b93acb6 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <aganea@havenstudios.com>
Date: Tue, 30 Jul 2024 19:06:03 -0400
Subject: [PATCH 084/427] [Support] Silence warnings when retrieving exported
 functions (#97905)

Since functions exported from DLLs are type-erased, before this patch I
was seeing the new Clang 19 warning `-Wcast-function-type-mismatch`.

This happens when building LLVM on Windows.

Following discussion in
https://github.com/llvm/llvm-project/commit/593f708118aef792f434185547f74fedeaf51dd4#commitcomment-143905744

(cherry picked from commit 39e192b379362e9e645427631c35450d55ed517d)
---
 llvm/lib/Support/Windows/Process.inc |  3 ++-
 llvm/lib/Support/Windows/Signals.inc | 38 +++++++++++++++-------------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index 34d294b232c32..d525f5b16e862 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -482,7 +482,8 @@ static RTL_OSVERSIONINFOEXW GetWindowsVer() {
     HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
     assert(hMod);
 
-    auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+    auto getVer =
+        (RtlGetVersionPtr)(void *)::GetProcAddress(hMod, "RtlGetVersion");
     assert(getVer);
 
     RTL_OSVERSIONINFOEXW info{};
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 29ebf7c696e04..f11ad09f37139 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -171,23 +171,27 @@ static bool load64BitDebugHelp(void) {
   HMODULE hLib =
       ::LoadLibraryExA("Dbghelp.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
   if (hLib) {
-    fMiniDumpWriteDump =
-        (fpMiniDumpWriteDump)::GetProcAddress(hLib, "MiniDumpWriteDump");
-    fStackWalk64 = (fpStackWalk64)::GetProcAddress(hLib, "StackWalk64");
-    fSymGetModuleBase64 =
-        (fpSymGetModuleBase64)::GetProcAddress(hLib, "SymGetModuleBase64");
-    fSymGetSymFromAddr64 =
-        (fpSymGetSymFromAddr64)::GetProcAddress(hLib, "SymGetSymFromAddr64");
-    fSymGetLineFromAddr64 =
-        (fpSymGetLineFromAddr64)::GetProcAddress(hLib, "SymGetLineFromAddr64");
-    fSymGetModuleInfo64 =
-        (fpSymGetModuleInfo64)::GetProcAddress(hLib, "SymGetModuleInfo64");
-    fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)::GetProcAddress(
-        hLib, "SymFunctionTableAccess64");
-    fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
-    fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
-    fEnumerateLoadedModules = (fpEnumerateLoadedModules)::GetProcAddress(
-        hLib, "EnumerateLoadedModules64");
+    fMiniDumpWriteDump = (fpMiniDumpWriteDump)(void *)::GetProcAddress(
+        hLib, "MiniDumpWriteDump");
+    fStackWalk64 = (fpStackWalk64)(void *)::GetProcAddress(hLib, "StackWalk64");
+    fSymGetModuleBase64 = (fpSymGetModuleBase64)(void *)::GetProcAddress(
+        hLib, "SymGetModuleBase64");
+    fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64)(void *)::GetProcAddress(
+        hLib, "SymGetSymFromAddr64");
+    fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)(void *)::GetProcAddress(
+        hLib, "SymGetLineFromAddr64");
+    fSymGetModuleInfo64 = (fpSymGetModuleInfo64)(void *)::GetProcAddress(
+        hLib, "SymGetModuleInfo64");
+    fSymFunctionTableAccess64 =
+        (fpSymFunctionTableAccess64)(void *)::GetProcAddress(
+            hLib, "SymFunctionTableAccess64");
+    fSymSetOptions =
+        (fpSymSetOptions)(void *)::GetProcAddress(hLib, "SymSetOptions");
+    fSymInitialize =
+        (fpSymInitialize)(void *)::GetProcAddress(hLib, "SymInitialize");
+    fEnumerateLoadedModules =
+        (fpEnumerateLoadedModules)(void *)::GetProcAddress(
+            hLib, "EnumerateLoadedModules64");
   }
   return isDebugHelpInitialized();
 }

From 19ebcf8685f2ef010a0bc8474b4bf732024a3576 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Mon, 29 Jul 2024 20:34:01 +0200
Subject: [PATCH 085/427] [InstrProf] Remove duplicate definition of IntPtrT

In 16e74fd48988a (for #82711) a duplicate definition of `IntPtrT` was
added to `InstrProfiling.h`, leading to warnings:

    compiler-rt/lib/profile/InstrProfiling.h:52:15: warning: redefinition of typedef 'IntPtrT' is a C11 feature [-Wtypedef-redefinition]
       52 | typedef void *IntPtrT;
          |               ^
    compiler-rt/lib/profile/InstrProfiling.h:34:15: note: previous definition is here
       34 | typedef void *IntPtrT;
          |               ^

Fix the warnings by removing the duplicate typedef.

(cherry picked from commit 2c376fe96c83443c15e6485d043ebe321904546b)
---
 compiler-rt/lib/profile/InstrProfiling.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index d424a22c212c3..6906d52eacaf1 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -49,7 +49,6 @@ typedef struct ValueProfNode {
 #include "profile/InstrProfData.inc"
 } ValueProfNode;
 
-typedef void *IntPtrT;
 typedef struct COMPILER_RT_ALIGNAS(INSTR_PROF_DATA_ALIGNMENT) VTableProfData {
 #define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Initializer) Type Name;
 #include "profile/InstrProfData.inc"

From 2d7539381c278dd47c9dd6ecb9943d9685ab66f4 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Thu, 1 Aug 2024 11:23:03 -0700
Subject: [PATCH 086/427] workflows: Fix libclc-tests (#101524)

The old out-of-tree build configuration stopped working and in tree
builds are supported now, so we should use the in tree configuration.
The only downside is we can't run the tests any more, but at least we
will be able to test the build again.

(cherry picked from commit 0512ba0a435a9d693cb61f182fc9e3eb7f6dbd6a)
---
 .github/workflows/llvm-project-tests.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml
index 0a228c41f354e..17a54be16badc 100644
--- a/.github/workflows/llvm-project-tests.yml
+++ b/.github/workflows/llvm-project-tests.yml
@@ -131,6 +131,7 @@ jobs:
                 -DCMAKE_BUILD_TYPE=Release \
                 -DLLVM_ENABLE_ASSERTIONS=ON \
                 -DLLDB_INCLUDE_TESTS=OFF \
+                -DLIBCLC_TARGETS_TO_BUILD="amdgcn--;amdgcn--amdhsa;r600--;nvptx--;nvptx64--;nvptx--nvidiacl;nvptx64--nvidiacl" \
                 -DCMAKE_C_COMPILER_LAUNCHER=sccache \
                 -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
                 $extra_cmake_args \
@@ -142,8 +143,6 @@ jobs:
         env:
           LLVM_BUILDDIR: ${{ steps.build-llvm.outputs.llvm-builddir }}
         run: |
-          # Make sure all of LLVM libraries that llvm-config needs are built.
+          # The libclc tests don't have a generated check target so all we can
+          # do is build it.
           ninja -C "$LLVM_BUILDDIR"
-          cmake -G Ninja -S libclc -B libclc-build -DLLVM_DIR="$LLVM_BUILDDIR"/lib/cmake/llvm -DLIBCLC_TARGETS_TO_BUILD="amdgcn--;amdgcn--amdhsa;r600--;nvptx--;nvptx64--;nvptx--nvidiacl;nvptx64--nvidiacl"
-          ninja -C libclc-build
-          ninja -C libclc-build test

From 23f3b64082ecd06fcfbfbc2098fcaa008862545b Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Thu, 1 Aug 2024 09:28:29 +0200
Subject: [PATCH 087/427] [lldb][FreeBSD] Fix
 NativeRegisterContextFreeBSD_{arm,mips64,powerpc} declarations (#101403)

Similar to #97796, fix the type of the `native_thread` parameter for the
arm, mips64 and powerpc variants of `NativeRegisterContextFreeBSD_*`.

Otherwise, this leads to compile errors similar to:

```
lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.cpp:85:39: error: out-of-line definition of 'NativeRegisterContextFreeBSD_powerpc' does not match any declaration in 'lldb_private::process_freebsd::NativeRegisterContextFreeBSD_powerpc'
   85 | NativeRegisterContextFreeBSD_powerpc::NativeRegisterContextFreeBSD_powerpc(
      |                                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```

(cherry picked from commit 7088a5ed880f29129ec844c66068e8cb61ca98bf)
---
 .../Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h  | 2 +-
 .../Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h       | 2 +-
 .../Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h
index 89ffa617294aa..b9537e6952f6c 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h
@@ -30,7 +30,7 @@ class NativeProcessFreeBSD;
 class NativeRegisterContextFreeBSD_arm : public NativeRegisterContextFreeBSD {
 public:
   NativeRegisterContextFreeBSD_arm(const ArchSpec &target_arch,
-                                   NativeThreadProtocol &native_thread);
+                                   NativeThreadFreeBSD &native_thread);
 
   uint32_t GetRegisterSetCount() const override;
 
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
index 0b4a508a7d5dd..286b4fd8d8b99 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
@@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_mips64
     : public NativeRegisterContextFreeBSD {
 public:
   NativeRegisterContextFreeBSD_mips64(const ArchSpec &target_arch,
-                                      NativeThreadProtocol &native_thread);
+                                      NativeThreadFreeBSD &native_thread);
 
   uint32_t GetRegisterSetCount() const override;
 
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h
index 3df371036f915..420db822acc0f 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h
@@ -31,7 +31,7 @@ class NativeRegisterContextFreeBSD_powerpc
     : public NativeRegisterContextFreeBSD {
 public:
   NativeRegisterContextFreeBSD_powerpc(const ArchSpec &target_arch,
-                                       NativeThreadProtocol &native_thread);
+                                       NativeThreadFreeBSD &native_thread);
 
   uint32_t GetRegisterSetCount() const override;
 

From 39e8e7797ae868e82b4184fbadf4572ff9bd3aa3 Mon Sep 17 00:00:00 2001
From: Damien L-G <dalg24@gmail.com>
Date: Thu, 1 Aug 2024 10:39:27 -0400
Subject: [PATCH 088/427] [libc++] Increase atomic_ref's required alignment for
 small types (#99654)

This patch increases the alignment requirement for std::atomic_ref
such that we can guarantee lockfree operations more often. Specifically,
we require types that are 1, 2, 4, 8, or 16 bytes in size to be aligned
to at least their size to be used with std::atomic_ref.

This is the case for most types, however a notable exception is
`long long` on x86, which is 8 bytes in length but has an alignment
of 4.

As a result of this patch, one has to be more careful about the
alignment of objects used with std::atomic_ref. Failure to provide
a properly-aligned object to std::atomic_ref is a precondition
violation and is technically UB. On the flipside, this allows us
to provide an atomic_ref that is actually lockfree more often,
which is an important QOI property.

More information in the discussion at https://github.com/llvm/llvm-project/pull/99570#issuecomment-2237668661.

Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
(cherry picked from commit 59ca618e3b7aec8c32e24d781bae436dc99b2727)
---
 libcxx/include/__atomic/atomic_ref.h            | 17 +++++++++++------
 .../atomics.ref/is_always_lock_free.pass.cpp    |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h
index 2849b82e1a3dd..b0180a37ab500 100644
--- a/libcxx/include/__atomic/atomic_ref.h
+++ b/libcxx/include/__atomic/atomic_ref.h
@@ -57,11 +57,6 @@ struct __get_aligner_instance {
 
 template <class _Tp>
 struct __atomic_ref_base {
-protected:
-  _Tp* __ptr_;
-
-  _LIBCPP_HIDE_FROM_ABI __atomic_ref_base(_Tp& __obj) : __ptr_(std::addressof(__obj)) {}
-
 private:
   _LIBCPP_HIDE_FROM_ABI static _Tp* __clear_padding(_Tp& __val) noexcept {
     _Tp* __ptr = std::addressof(__val);
@@ -108,10 +103,14 @@ struct __atomic_ref_base {
 
   friend struct __atomic_waitable_traits<__atomic_ref_base<_Tp>>;
 
+  // require types that are 1, 2, 4, 8, or 16 bytes in length to be aligned to at least their size to be potentially
+  // used lock-free
+  static constexpr size_t __min_alignment = (sizeof(_Tp) & (sizeof(_Tp) - 1)) || (sizeof(_Tp) > 16) ? 0 : sizeof(_Tp);
+
 public:
   using value_type = _Tp;
 
-  static constexpr size_t required_alignment = alignof(_Tp);
+  static constexpr size_t required_alignment = alignof(_Tp) > __min_alignment ? alignof(_Tp) : __min_alignment;
 
   // The __atomic_always_lock_free builtin takes into account the alignment of the pointer if provided,
   // so we create a fake pointer with a suitable alignment when querying it. Note that we are guaranteed
@@ -218,6 +217,12 @@ struct __atomic_ref_base {
   }
   _LIBCPP_HIDE_FROM_ABI void notify_one() const noexcept { std::__atomic_notify_one(*this); }
   _LIBCPP_HIDE_FROM_ABI void notify_all() const noexcept { std::__atomic_notify_all(*this); }
+
+protected:
+  typedef _Tp _Aligned_Tp __attribute__((aligned(required_alignment)));
+  _Aligned_Tp* __ptr_;
+
+  _LIBCPP_HIDE_FROM_ABI __atomic_ref_base(_Tp& __obj) : __ptr_(std::addressof(__obj)) {}
 };
 
 template <class _Tp>
diff --git a/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
index acdbf63a24d85..78e46c0397951 100644
--- a/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.ref/is_always_lock_free.pass.cpp
@@ -54,7 +54,7 @@ void check_always_lock_free(std::atomic_ref<T> const& a) {
 #define CHECK_ALWAYS_LOCK_FREE(T)                                                                                      \
   do {                                                                                                                 \
     typedef T type;                                                                                                    \
-    type obj{};                                                                                                        \
+    alignas(std::atomic_ref<type>::required_alignment) type obj{};                                                     \
     std::atomic_ref<type> a(obj);                                                                                      \
     check_always_lock_free(a);                                                                                         \
   } while (0)

From 3ee69f240579430c0c0abdc4641ccdf85b4efe92 Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Thu, 1 Aug 2024 07:25:01 -0400
Subject: [PATCH 089/427] [NFC][libc++][libc++abi][libunwind][test] Fix/unify
 AIX triples used in LIT tests (#101196)

This patch fixes/unifies AIX target triples used in libc++, libc++abi,
and libunwind LIT tests.

(cherry picked from commit 2d3655037ccfa276cb0949c2ce0cff56985f6637)
---
 libcxx/test/libcxx/vendor/ibm/bad_function_call.pass.cpp        | 2 +-
 libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_32.pass.sh.s  | 2 +-
 libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_64.pass.sh.s  | 2 +-
 .../test/vendor/ibm/aix_xlclang_passing_excp_obj_32.pass.sh.S   | 2 +-
 .../test/vendor/ibm/aix_xlclang_passing_excp_obj_64.pass.sh.S   | 2 +-
 libcxxabi/test/vendor/ibm/cond_reg_restore.pass.cpp             | 2 +-
 libcxxabi/test/vendor/ibm/vec_reg_restore.pass.cpp              | 2 +-
 libunwind/test/aix_signal_unwind.pass.sh.S                      | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libcxx/test/libcxx/vendor/ibm/bad_function_call.pass.cpp b/libcxx/test/libcxx/vendor/ibm/bad_function_call.pass.cpp
index 2b684465650fa..3714e4037a2dc 100644
--- a/libcxx/test/libcxx/vendor/ibm/bad_function_call.pass.cpp
+++ b/libcxx/test/libcxx/vendor/ibm/bad_function_call.pass.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// REQUIRES: target={{powerpc.*-ibm-aix.*}}
+// REQUIRES: target={{.+}}-aix{{.*}}
 // ADDITIONAL_COMPILE_FLAGS: -fvisibility-inlines-hidden
 
 // When there is a weak hidden symbol in user code and a strong definition
diff --git a/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_32.pass.sh.s b/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_32.pass.sh.s
index ce90045586082..b35c999e6e50d 100644
--- a/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_32.pass.sh.s
+++ b/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_32.pass.sh.s
@@ -9,7 +9,7 @@
 # Test that a nested exception is thrown by a destructor inside a try-block
 # when the code is generated by the legacy AIX xlclang compiler.
 
-# REQUIRES: target=powerpc-ibm-aix
+# REQUIRES: target=powerpc-ibm-aix{{.*}}
 # UNSUPPORTED: no-exceptions
 
 # RUN: %{cxx} %{flags} %s %{link_flags} \
diff --git a/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_64.pass.sh.s b/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_64.pass.sh.s
index 7b0afb9ebae38..16754db2837ca 100644
--- a/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_64.pass.sh.s
+++ b/libcxxabi/test/vendor/ibm/aix_xlclang_nested_excp_64.pass.sh.s
@@ -8,7 +8,7 @@
 # Test that a nested exception is thrown by a destructor inside a try-block
 # when the code is generated by the legacy AIX xlclang compiler.
 
-# REQUIRES: target=powerpc64-ibm-aix
+# REQUIRES: target=powerpc64-ibm-aix{{.*}}
 # UNSUPPORTED: no-exceptions
 
 # RUN: %{cxx} %{flags} %s %{link_flags} \
diff --git a/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_32.pass.sh.S b/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_32.pass.sh.S
index 71c3ab9409a81..8b92e4febf562 100644
--- a/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_32.pass.sh.S
+++ b/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_32.pass.sh.S
@@ -14,7 +14,7 @@
 // xlclang++ compiler included in this file. This file tests for the 32-bit
 // mode.
 
-# REQUIRES: target=powerpc-ibm-aix
+# REQUIRES: target=powerpc-ibm-aix{{.*}}
 # UNSUPPORTED: no-exceptions
 
 // RUN: %{cxx} -c %s -o %t1_32.o -DT1_CPP_CODE %{flags} %{compile_flags}
diff --git a/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_64.pass.sh.S b/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_64.pass.sh.S
index da413577bd38f..64d7c80e9e6dd 100644
--- a/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_64.pass.sh.S
+++ b/libcxxabi/test/vendor/ibm/aix_xlclang_passing_excp_obj_64.pass.sh.S
@@ -14,7 +14,7 @@
 // xlclang++ compiler included in this file. This file tests for the 64-bit
 // mode.
 
-# REQUIRES: target=powerpc64-ibm-aix
+# REQUIRES: target=powerpc64-ibm-aix{{.*}}
 # UNSUPPORTED: no-exceptions
 
 // RUN: %{cxx} -c %s -o %t1_64.o -DT1_CPP_CODE %{flags} %{compile_flags}
diff --git a/libcxxabi/test/vendor/ibm/cond_reg_restore.pass.cpp b/libcxxabi/test/vendor/ibm/cond_reg_restore.pass.cpp
index 63817e1b13a25..a5eb3c20534a3 100644
--- a/libcxxabi/test/vendor/ibm/cond_reg_restore.pass.cpp
+++ b/libcxxabi/test/vendor/ibm/cond_reg_restore.pass.cpp
@@ -10,7 +10,7 @@
 // on AIX. Option -O3 is required so that the compiler will re-use the value
 // in the condition register instead of re-evaluating the condition expression.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target={{.+}}-aix{{.*}}
 // ADDITIONAL_COMPILE_FLAGS: -O3
 // UNSUPPORTED: no-exceptions
 
diff --git a/libcxxabi/test/vendor/ibm/vec_reg_restore.pass.cpp b/libcxxabi/test/vendor/ibm/vec_reg_restore.pass.cpp
index 703c311dae392..7c31970546993 100644
--- a/libcxxabi/test/vendor/ibm/vec_reg_restore.pass.cpp
+++ b/libcxxabi/test/vendor/ibm/vec_reg_restore.pass.cpp
@@ -9,7 +9,7 @@
 // Check that the PowerPC vector registers are restored properly during
 // unwinding. Option -mabi=vec-extabi is required to compile the test case.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix
+// REQUIRES: target={{.+}}-aix{{.*}}
 // ADDITIONAL_COMPILE_FLAGS: -mabi=vec-extabi
 // UNSUPPORTED: no-exceptions
 
diff --git a/libunwind/test/aix_signal_unwind.pass.sh.S b/libunwind/test/aix_signal_unwind.pass.sh.S
index a666577d095b1..2c0cf140fe267 100644
--- a/libunwind/test/aix_signal_unwind.pass.sh.S
+++ b/libunwind/test/aix_signal_unwind.pass.sh.S
@@ -10,7 +10,7 @@
 // a correct traceback when the function raising the signal does not save
 // the link register or does not store the stack back chain.
 
-// REQUIRES: target=powerpc{{(64)?}}-ibm-aix{{.*}}
+// REQUIRES: target={{.+}}-aix{{.*}}
 
 // Test when the function raising the signal does not save the link register
 // RUN: %{cxx} -x c++ %s -o %t.exe -DCXX_CODE %{flags} %{compile_flags}

From 142499d9a21309c7c5bacf34c35bb42fbffb7a8f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 1 Aug 2024 10:22:03 -0700
Subject: [PATCH 090/427] [ELF] Support relocatable files using CREL with
 explicit addends

... using the temporary section type code 0x40000020
(`clang -c -Wa,--crel,--allow-experimental-crel`). LLVM will change the
code and break compatibility (Clang and lld of different versions are
not guaranteed to cooperate, unlike other features). CREL with implicit
addends are not supported.

---

Introduce `RelsOrRelas::crels` to iterate over SHT_CREL sections and
update users to check `crels`.

(The decoding performance is critical and error checking is difficult.
Follow `skipLeb` and `R_*LEB128` handling, do not use
`llvm::decodeULEB128`, whichs compiles to a lot of code.)

A few users (e.g. .eh_frame, LLDDwarfObj, s390x) require random access. Pass
`/*supportsCrel=*/false` to `relsOrRelas` to allocate a buffer and
convert CREL to RELA (`relas` instead of `crels` will be used). Since
allocating a buffer increases, the conversion is only performed when
absolutely necessary.

---

Non-alloc SHT_CREL sections may be created in -r and --emit-relocs
links. SHT_CREL and SHT_RELA components need reencoding since
r_offset/r_symidx/r_type/r_addend may change. (r_type may change because
relocations referencing a symbol in a discarded section are converted to
`R_*_NONE`).

* SHT_CREL components: decode with `RelsOrRelas` and re-encode (`OutputSection::finalizeNonAllocCrel`)
* SHT_RELA components: convert to CREL (`relToCrel`). An output section can only have one relocation section.
* SHT_REL components: print an error for now.

SHT_REL to SHT_CREL conversion for -r/--emit-relocs is complex and
unsupported yet.

Link: https://discourse.llvm.org/t/rfc-crel-a-compact-relocation-format-for-elf/77600

Pull Request: https://github.com/llvm/llvm-project/pull/98115

(cherry picked from commit 0af07c078798b7c427e2981377781b5cc555a568)
---
 lld/ELF/DWARF.cpp                          |   3 +-
 lld/ELF/ICF.cpp                            |   8 +-
 lld/ELF/InputFiles.cpp                     |   1 +
 lld/ELF/InputFiles.h                       |   1 +
 lld/ELF/InputSection.cpp                   |  67 ++++++++---
 lld/ELF/InputSection.h                     |  14 ++-
 lld/ELF/LinkerScript.cpp                   |   2 +
 lld/ELF/MarkLive.cpp                       |  12 +-
 lld/ELF/OutputSections.cpp                 | 132 ++++++++++++++++++++-
 lld/ELF/OutputSections.h                   |   6 +
 lld/ELF/Relocations.cpp                    |  38 ++++--
 lld/ELF/Relocations.h                      |  92 ++++++++++++++
 lld/ELF/SyntheticSections.cpp              |   6 +-
 lld/ELF/Writer.cpp                         |  13 +-
 lld/test/ELF/crel-rel-mixed.s              |  22 ++++
 lld/test/ELF/crel.s                        |  90 ++++++++++++++
 lld/test/ELF/debug-names.s                 |   2 +-
 lld/test/ELF/gc-sections.s                 |   4 +
 lld/test/ELF/icf1.s                        |   3 +
 lld/test/ELF/icf4.s                        |   2 +-
 lld/test/ELF/linkerscript/nocrossrefs.test |   4 +-
 lld/test/ELF/relocatable-crel-32.s         |  71 +++++++++++
 lld/test/ELF/relocatable-crel.s            | 107 +++++++++++++++++
 23 files changed, 656 insertions(+), 44 deletions(-)
 create mode 100644 lld/test/ELF/crel-rel-mixed.s
 create mode 100644 lld/test/ELF/crel.s
 create mode 100644 lld/test/ELF/relocatable-crel-32.s
 create mode 100644 lld/test/ELF/relocatable-crel.s

diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index 5d58e0c60a952..517d26810a378 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -136,7 +136,8 @@ template <class ELFT>
 std::optional<RelocAddrEntry>
 LLDDwarfObj<ELFT>::find(const llvm::DWARFSection &s, uint64_t pos) const {
   auto &sec = static_cast<const LLDDWARFSection &>(s);
-  const RelsOrRelas<ELFT> rels = sec.sec->template relsOrRelas<ELFT>();
+  const RelsOrRelas<ELFT> rels =
+      sec.sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false);
   if (rels.areRelocsRel())
     return findAux(*sec.sec, pos, rels.rels);
   return findAux(*sec.sec, pos, rels.relas);
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index a6b52d78fa806..44e8a71cc6286 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -324,6 +324,8 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
 
   const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
   const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
+  if (ra.areRelocsCrel())
+    return constantEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
              ? constantEq(a, ra.rels, b, rb.rels)
              : constantEq(a, ra.relas, b, rb.relas);
@@ -374,6 +376,8 @@ template <class ELFT>
 bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) {
   const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
   const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
+  if (ra.areRelocsCrel())
+    return variableEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
              ? variableEq(a, ra.rels, b, rb.rels)
              : variableEq(a, ra.relas, b, rb.relas);
@@ -505,7 +509,9 @@ template <class ELFT> void ICF<ELFT>::run() {
   for (unsigned cnt = 0; cnt != 2; ++cnt) {
     parallelForEach(sections, [&](InputSection *s) {
       const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();
-      if (rels.areRelocsRel())
+      if (rels.areRelocsCrel())
+        combineRelocHashes(cnt, s, rels.crels);
+      else if (rels.areRelocsRel())
         combineRelocHashes(cnt, s, rels.rels);
       else
         combineRelocHashes(cnt, s, rels.relas);
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 03ff4eadfe670..f1c0eb292361b 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -834,6 +834,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
     case SHT_STRTAB:
     case SHT_REL:
     case SHT_RELA:
+    case SHT_CREL:
     case SHT_NULL:
       break;
     case SHT_PROGBITS:
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 0617f41e1e13a..8566baf61e1ab 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -84,6 +84,7 @@ class InputFile {
     assert(fileKind == ObjKind || fileKind == BinaryKind);
     return sections;
   }
+  void cacheDecodedCrel(size_t i, InputSectionBase *s) { sections[i] = s; }
 
   // Returns object file symbols. It is a runtime error to call this
   // function on files of other types.
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 7857d857488c0..570e485455bad 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -133,21 +133,56 @@ void InputSectionBase::decompress() const {
   compressed = false;
 }
 
-template <class ELFT> RelsOrRelas<ELFT> InputSectionBase::relsOrRelas() const {
+template <class ELFT>
+RelsOrRelas<ELFT> InputSectionBase::relsOrRelas(bool supportsCrel) const {
   if (relSecIdx == 0)
     return {};
   RelsOrRelas<ELFT> ret;
-  typename ELFT::Shdr shdr =
-      cast<ELFFileBase>(file)->getELFShdrs<ELFT>()[relSecIdx];
+  auto *f = cast<ObjFile<ELFT>>(file);
+  typename ELFT::Shdr shdr = f->template getELFShdrs<ELFT>()[relSecIdx];
+  if (shdr.sh_type == SHT_CREL) {
+    // Return an iterator if supported by caller.
+    if (supportsCrel) {
+      ret.crels = Relocs<typename ELFT::Crel>(
+          (const uint8_t *)f->mb.getBufferStart() + shdr.sh_offset);
+      return ret;
+    }
+    InputSectionBase *const &relSec = f->getSections()[relSecIdx];
+    // Otherwise, allocate a buffer to hold the decoded RELA relocations. When
+    // called for the first time, relSec is null (without --emit-relocs) or an
+    // InputSection with zero eqClass[0].
+    if (!relSec || !cast<InputSection>(relSec)->eqClass[0]) {
+      auto *sec = makeThreadLocal<InputSection>(*f, shdr, name);
+      f->cacheDecodedCrel(relSecIdx, sec);
+      sec->type = SHT_RELA;
+      sec->eqClass[0] = SHT_RELA;
+
+      RelocsCrel<ELFT::Is64Bits> entries(sec->content_);
+      sec->size = entries.size() * sizeof(typename ELFT::Rela);
+      auto *relas = makeThreadLocalN<typename ELFT::Rela>(entries.size());
+      sec->content_ = reinterpret_cast<uint8_t *>(relas);
+      for (auto [i, r] : llvm::enumerate(entries)) {
+        relas[i].r_offset = r.r_offset;
+        relas[i].setSymbolAndType(r.r_symidx, r.r_type, false);
+        relas[i].r_addend = r.r_addend;
+      }
+    }
+    ret.relas = {ArrayRef(
+        reinterpret_cast<const typename ELFT::Rela *>(relSec->content_),
+        relSec->size / sizeof(typename ELFT::Rela))};
+    return ret;
+  }
+
+  const void *content = f->mb.getBufferStart() + shdr.sh_offset;
+  size_t size = shdr.sh_size;
   if (shdr.sh_type == SHT_REL) {
-    ret.rels = ArrayRef(reinterpret_cast<const typename ELFT::Rel *>(
-                            file->mb.getBufferStart() + shdr.sh_offset),
-                        shdr.sh_size / sizeof(typename ELFT::Rel));
+    ret.rels = {ArrayRef(reinterpret_cast<const typename ELFT::Rel *>(content),
+                         size / sizeof(typename ELFT::Rel))};
   } else {
     assert(shdr.sh_type == SHT_RELA);
-    ret.relas = ArrayRef(reinterpret_cast<const typename ELFT::Rela *>(
-                             file->mb.getBufferStart() + shdr.sh_offset),
-                         shdr.sh_size / sizeof(typename ELFT::Rela));
+    ret.relas = {
+        ArrayRef(reinterpret_cast<const typename ELFT::Rela *>(content),
+                 size / sizeof(typename ELFT::Rela))};
   }
   return ret;
 }
@@ -1248,7 +1283,7 @@ SyntheticSection *EhInputSection::getParent() const {
 // .eh_frame is a sequence of CIE or FDE records.
 // This function splits an input section into records and returns them.
 template <class ELFT> void EhInputSection::split() {
-  const RelsOrRelas<ELFT> rels = relsOrRelas<ELFT>();
+  const RelsOrRelas<ELFT> rels = relsOrRelas<ELFT>(/*supportsCrel=*/false);
   // getReloc expects the relocations to be sorted by r_offset. See the comment
   // in scanRelocs.
   if (rels.areRelocsRel()) {
@@ -1414,10 +1449,14 @@ template void InputSection::writeTo<ELF32BE>(uint8_t *);
 template void InputSection::writeTo<ELF64LE>(uint8_t *);
 template void InputSection::writeTo<ELF64BE>(uint8_t *);
 
-template RelsOrRelas<ELF32LE> InputSectionBase::relsOrRelas<ELF32LE>() const;
-template RelsOrRelas<ELF32BE> InputSectionBase::relsOrRelas<ELF32BE>() const;
-template RelsOrRelas<ELF64LE> InputSectionBase::relsOrRelas<ELF64LE>() const;
-template RelsOrRelas<ELF64BE> InputSectionBase::relsOrRelas<ELF64BE>() const;
+template RelsOrRelas<ELF32LE>
+InputSectionBase::relsOrRelas<ELF32LE>(bool) const;
+template RelsOrRelas<ELF32BE>
+InputSectionBase::relsOrRelas<ELF32BE>(bool) const;
+template RelsOrRelas<ELF64LE>
+InputSectionBase::relsOrRelas<ELF64LE>(bool) const;
+template RelsOrRelas<ELF64BE>
+InputSectionBase::relsOrRelas<ELF64BE>(bool) const;
 
 template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &,
                                               const ELF32LE::Shdr &, StringRef);
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index c89a545e1543f..6659530a9c9c2 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -35,17 +35,21 @@ class OutputSection;
 
 LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions;
 
-// Returned by InputSectionBase::relsOrRelas. At least one member is empty.
+// Returned by InputSectionBase::relsOrRelas. At most one member is empty.
 template <class ELFT> struct RelsOrRelas {
   Relocs<typename ELFT::Rel> rels;
   Relocs<typename ELFT::Rela> relas;
+  Relocs<typename ELFT::Crel> crels;
   bool areRelocsRel() const { return rels.size(); }
+  bool areRelocsCrel() const { return crels.size(); }
 };
 
 #define invokeOnRelocs(sec, f, ...)                                            \
   {                                                                            \
     const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>();           \
-    if (rs.areRelocsRel())                                                     \
+    if (rs.areRelocsCrel())                                                    \
+      f(__VA_ARGS__, rs.crels);                                                \
+    else if (rs.areRelocsRel())                                                \
       f(__VA_ARGS__, rs.rels);                                                 \
     else                                                                       \
       f(__VA_ARGS__, rs.relas);                                                \
@@ -209,7 +213,8 @@ class InputSectionBase : public SectionBase {
   // used by --gc-sections.
   InputSectionBase *nextInSectionGroup = nullptr;
 
-  template <class ELFT> RelsOrRelas<ELFT> relsOrRelas() const;
+  template <class ELFT>
+  RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const;
 
   // InputSections that are dependent on us (reverse dependency for GC)
   llvm::TinyPtrVector<InputSection *> dependentSections;
@@ -483,7 +488,8 @@ class SyntheticSection : public InputSection {
 };
 
 inline bool isStaticRelSecType(uint32_t type) {
-  return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_REL;
+  return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL ||
+         type == llvm::ELF::SHT_REL;
 }
 
 inline bool isDebugSection(const InputSectionBase &sec) {
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index e2208da18dce0..055fa21d44ca6 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -61,6 +61,8 @@ static StringRef getOutputSectionName(const InputSectionBase *s) {
         assert(config->relocatable && (rel->flags & SHF_LINK_ORDER));
         return s->name;
       }
+      if (s->type == SHT_CREL)
+        return saver().save(".crel" + out->name);
       if (s->type == SHT_RELA)
         return saver().save(".rela" + out->name);
       return saver().save(".rel" + out->name);
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 45431e44a6c8c..16e5883c2002c 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -85,6 +85,13 @@ static uint64_t getAddend(InputSectionBase &sec,
   return rel.r_addend;
 }
 
+// Currently, we assume all input CREL relocations have an explicit addend.
+template <class ELFT>
+static uint64_t getAddend(InputSectionBase &sec,
+                          const typename ELFT::Crel &rel) {
+  return rel.r_addend;
+}
+
 template <class ELFT>
 template <class RelTy>
 void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
@@ -239,7 +246,8 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   // all of them. We also want to preserve personality routines and LSDA
   // referenced by .eh_frame sections, so we scan them for that here.
   for (EhInputSection *eh : ctx.ehInputSections) {
-    const RelsOrRelas<ELFT> rels = eh->template relsOrRelas<ELFT>();
+    const RelsOrRelas<ELFT> rels =
+        eh->template relsOrRelas<ELFT>(/*supportsCrel=*/false);
     if (rels.areRelocsRel())
       scanEhFrameSection(*eh, rels.rels);
     else if (rels.relas.size())
@@ -310,6 +318,8 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
       resolveReloc(sec, rel, false);
     for (const typename ELFT::Rela &rel : rels.relas)
       resolveReloc(sec, rel, false);
+    for (const typename ELFT::Crel &rel : rels.crels)
+      resolveReloc(sec, rel, false);
 
     for (InputSectionBase *isec : sec.dependentSections)
       enqueue(isec, 0);
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 60de10061c53d..29f18f89274f3 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -18,6 +18,7 @@
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Config/llvm-config.h" // LLVM_ENABLE_ZLIB
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -115,7 +116,19 @@ void OutputSection::recordSection(InputSectionBase *isec) {
 // other InputSections.
 void OutputSection::commitSection(InputSection *isec) {
   if (LLVM_UNLIKELY(type != isec->type)) {
-    if (hasInputSections || typeIsSet) {
+    if (!hasInputSections && !typeIsSet) {
+      type = isec->type;
+    } else if (isStaticRelSecType(type) && isStaticRelSecType(isec->type) &&
+               (type == SHT_CREL) != (isec->type == SHT_CREL)) {
+      // Combine mixed SHT_REL[A] and SHT_CREL to SHT_CREL.
+      type = SHT_CREL;
+      if (type == SHT_REL) {
+        if (name.consume_front(".rel"))
+          name = saver().save(".crel" + name);
+      } else if (name.consume_front(".rela")) {
+        name = saver().save(".crel" + name);
+      }
+    } else {
       if (typeIsSet || !canMergeToProgbits(type) ||
           !canMergeToProgbits(isec->type)) {
         // The (NOLOAD) changes the section type to SHT_NOBITS, the intention is
@@ -133,8 +146,6 @@ void OutputSection::commitSection(InputSection *isec) {
       }
       if (!typeIsSet)
         type = SHT_PROGBITS;
-    } else {
-      type = isec->type;
     }
   }
   if (!hasInputSections) {
@@ -470,6 +481,11 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
   llvm::TimeTraceScope timeScope("Write sections", name);
   if (type == SHT_NOBITS)
     return;
+  if (type == SHT_CREL && !(flags & SHF_ALLOC)) {
+    buf += encodeULEB128(crelHeader, buf);
+    memcpy(buf, crelBody.data(), crelBody.size());
+    return;
+  }
 
   // If the section is compressed due to
   // --compress-debug-section/--compress-sections, the content is already known.
@@ -505,6 +521,12 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
   if (nonZeroFiller)
     fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler);
 
+  if (type == SHT_CREL && !(flags & SHF_ALLOC)) {
+    buf += encodeULEB128(crelHeader, buf);
+    memcpy(buf, crelBody.data(), crelBody.size());
+    return;
+  }
+
   auto fn = [=](size_t begin, size_t end) {
     size_t numSections = sections.size();
     for (size_t i = begin; i != end; ++i) {
@@ -592,6 +614,103 @@ static void finalizeShtGroup(OutputSection *os, InputSection *section) {
   os->size = (1 + seen.size()) * sizeof(uint32_t);
 }
 
+template <class uint>
+LLVM_ATTRIBUTE_ALWAYS_INLINE static void
+encodeOneCrel(raw_svector_ostream &os, Elf_Crel<sizeof(uint) == 8> &out,
+              uint offset, const Symbol &sym, uint32_t type, uint addend) {
+  const auto deltaOffset = static_cast<uint64_t>(offset - out.r_offset);
+  out.r_offset = offset;
+  int64_t symidx = in.symTab->getSymbolIndex(sym);
+  if (sym.type == STT_SECTION) {
+    auto *d = dyn_cast<Defined>(&sym);
+    if (d) {
+      SectionBase *section = d->section;
+      assert(section->isLive());
+      addend = sym.getVA(addend) - section->getOutputSection()->addr;
+    } else {
+      // Encode R_*_NONE(symidx=0).
+      symidx = type = addend = 0;
+    }
+  }
+
+  // Similar to llvm::ELF::encodeCrel.
+  uint8_t b = deltaOffset * 8 + (out.r_symidx != symidx) +
+              (out.r_type != type ? 2 : 0) +
+              (uint(out.r_addend) != addend ? 4 : 0);
+  if (deltaOffset < 0x10) {
+    os << char(b);
+  } else {
+    os << char(b | 0x80);
+    encodeULEB128(deltaOffset >> 4, os);
+  }
+  if (b & 1) {
+    encodeSLEB128(static_cast<int32_t>(symidx - out.r_symidx), os);
+    out.r_symidx = symidx;
+  }
+  if (b & 2) {
+    encodeSLEB128(static_cast<int32_t>(type - out.r_type), os);
+    out.r_type = type;
+  }
+  if (b & 4) {
+    encodeSLEB128(std::make_signed_t<uint>(addend - out.r_addend), os);
+    out.r_addend = addend;
+  }
+}
+
+template <class ELFT>
+static size_t relToCrel(raw_svector_ostream &os, Elf_Crel<ELFT::Is64Bits> &out,
+                        InputSection *relSec, InputSectionBase *sec) {
+  const auto &file = *cast<ELFFileBase>(relSec->file);
+  if (relSec->type == SHT_REL) {
+    // REL conversion is complex and unsupported yet.
+    errorOrWarn(toString(relSec) + ": REL cannot be converted to CREL");
+    return 0;
+  }
+  auto rels = relSec->getDataAs<typename ELFT::Rela>();
+  for (auto rel : rels) {
+    encodeOneCrel<typename ELFT::uint>(
+        os, out, sec->getVA(rel.r_offset), file.getRelocTargetSym(rel),
+        rel.getType(config->isMips64EL), getAddend<ELFT>(rel));
+  }
+  return rels.size();
+}
+
+// Compute the content of a non-alloc CREL section due to -r or --emit-relocs.
+// Input CREL sections are decoded while REL[A] need to be converted.
+template <bool is64> void OutputSection::finalizeNonAllocCrel() {
+  using uint = typename Elf_Crel_Impl<is64>::uint;
+  raw_svector_ostream os(crelBody);
+  uint64_t totalCount = 0;
+  Elf_Crel<is64> out{};
+  assert(commands.size() == 1);
+  auto *isd = cast<InputSectionDescription>(commands[0]);
+  for (InputSection *relSec : isd->sections) {
+    const auto &file = *cast<ELFFileBase>(relSec->file);
+    InputSectionBase *sec = relSec->getRelocatedSection();
+    if (relSec->type == SHT_CREL) {
+      RelocsCrel<is64> entries(relSec->content_);
+      totalCount += entries.size();
+      for (Elf_Crel_Impl<is64> r : entries) {
+        encodeOneCrel<uint>(os, out, uint(sec->getVA(r.r_offset)),
+                            file.getSymbol(r.r_symidx), r.r_type, r.r_addend);
+      }
+      continue;
+    }
+
+    // Convert REL[A] to CREL.
+    if constexpr (is64) {
+      totalCount += config->isLE ? relToCrel<ELF64LE>(os, out, relSec, sec)
+                                 : relToCrel<ELF64BE>(os, out, relSec, sec);
+    } else {
+      totalCount += config->isLE ? relToCrel<ELF32LE>(os, out, relSec, sec)
+                                 : relToCrel<ELF32BE>(os, out, relSec, sec);
+    }
+  }
+
+  crelHeader = totalCount * 8 + 4;
+  size = getULEB128Size(crelHeader) + crelBody.size();
+}
+
 void OutputSection::finalize() {
   InputSection *first = getFirstInputSection(this);
 
@@ -628,6 +747,13 @@ void OutputSection::finalize() {
   InputSectionBase *s = first->getRelocatedSection();
   info = s->getOutputSection()->sectionIndex;
   flags |= SHF_INFO_LINK;
+  // Finalize the content of non-alloc CREL.
+  if (type == SHT_CREL) {
+    if (config->is64)
+      finalizeNonAllocCrel<true>();
+    else
+      finalizeNonAllocCrel<false>();
+  }
 }
 
 // Returns true if S is in one of the many forms the compiler driver may pass
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 78fede48a23f2..8c0c52f34ac9f 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -84,6 +84,11 @@ class OutputSection final : public SectionBase {
   Expr alignExpr;
   Expr lmaExpr;
   Expr subalignExpr;
+
+  // Used by non-alloc SHT_CREL to hold the header and content byte stream.
+  uint64_t crelHeader = 0;
+  SmallVector<char, 0> crelBody;
+
   SmallVector<SectionCommand *, 0> commands;
   SmallVector<StringRef, 0> phdrs;
   std::optional<std::array<uint8_t, 4>> filler;
@@ -106,6 +111,7 @@ class OutputSection final : public SectionBase {
   // DATA_RELRO_END.
   bool relro = false;
 
+  template <bool is64> void finalizeNonAllocCrel();
   void finalize();
   template <class ELFT>
   void writeTo(uint8_t *buf, llvm::parallel::TaskGroup &tg);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 9a799cd286135..e19b1e6c8efb8 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1441,10 +1441,11 @@ void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) {
   uint32_t symIndex = rel.getSymbol(config->isMips64EL);
   Symbol &sym = sec->getFile<ELFT>()->getSymbol(symIndex);
   RelType type;
-  if constexpr (ELFT::Is64Bits) {
+  if constexpr (ELFT::Is64Bits || RelTy::IsCrel) {
     type = rel.getType(config->isMips64EL);
     ++i;
   } else {
+    // CREL is unsupported for MIPS N32.
     if (config->mipsN32Abi) {
       type = getMipsN32RelType(i);
     } else {
@@ -1497,15 +1498,18 @@ void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) {
 
     if ((type == R_PPC64_TLSGD && expr == R_TLSDESC_CALL) ||
         (type == R_PPC64_TLSLD && expr == R_TLSLD_HINT)) {
-      if (i == end) {
-        errorOrWarn("R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last "
-                    "relocation" +
-                    getLocation(*sec, sym, offset));
-        return;
+      // Skip the error check for CREL, which does not set `end`.
+      if constexpr (!RelTy::IsCrel) {
+        if (i == end) {
+          errorOrWarn("R_PPC64_TLSGD/R_PPC64_TLSLD may not be the last "
+                      "relocation" +
+                      getLocation(*sec, sym, offset));
+          return;
+        }
       }
 
-      // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC case,
-      // so we can discern it later from the toc-case.
+      // Offset the 4-byte aligned R_PPC64_TLSGD by one byte in the NOTOC
+      // case, so we can discern it later from the toc-case.
       if (i->getType(/*isMips64EL=*/false) == R_PPC64_REL24_NOTOC)
         ++offset;
     }
@@ -1545,7 +1549,7 @@ void RelocationScanner::scanOne(typename Relocs<RelTy>::const_iterator &i) {
 // instructions are generated by very old IBM XL compilers. Work around the
 // issue by disabling GD/LD to IE/LE relaxation.
 template <class RelTy>
-static void checkPPC64TLSRelax(InputSectionBase &sec, ArrayRef<RelTy> rels) {
+static void checkPPC64TLSRelax(InputSectionBase &sec, Relocs<RelTy> rels) {
   // Skip if sec is synthetic (sec.file is null) or if sec has been marked.
   if (!sec.file || sec.file->ppc64DisableTLSRelax)
     return;
@@ -1593,9 +1597,15 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
   if (isa<EhInputSection>(sec) || config->emachine == EM_S390)
     rels = sortRels(rels, storage);
 
-  end = static_cast<const void *>(rels.end());
-  for (auto i = rels.begin(); i != end;)
-    scanOne<ELFT, RelTy>(i);
+  if constexpr (RelTy::IsCrel) {
+    for (auto i = rels.begin(); i != rels.end();)
+      scanOne<ELFT, RelTy>(i);
+  } else {
+    // The non-CREL code path has additional check for PPC64 TLS.
+    end = static_cast<const void *>(rels.end());
+    for (auto i = rels.begin(); i != end;)
+      scanOne<ELFT, RelTy>(i);
+  }
 
   // Sort relocations by offset for more efficient searching for
   // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
@@ -1611,7 +1621,9 @@ template <class ELFT> void RelocationScanner::scanSection(InputSectionBase &s) {
   sec = &s;
   getter = OffsetGetter(s);
   const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>();
-  if (rels.areRelocsRel())
+  if (rels.areRelocsCrel())
+    scan<ELFT>(rels.crels);
+  else if (rels.areRelocsRel())
     scan<ELFT>(rels.rels);
   else
     scan<ELFT>(rels.relas);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 77d8d52ca3d3f..aaa4581490a28 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -12,6 +12,7 @@
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/ELFTypes.h"
 #include <vector>
 
 namespace lld::elf {
@@ -205,11 +206,91 @@ class ThunkCreator {
   uint32_t pass = 0;
 };
 
+// Decode LEB128 without error checking. Only used by performance critical code
+// like RelocsCrel.
+inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
+  uint64_t acc = 0, shift = 0, byte;
+  do {
+    byte = *p++;
+    acc |= (byte - 128 * (byte >= leb)) << shift;
+    shift += 7;
+  } while (byte >= 128);
+  return acc;
+}
+inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); }
+inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); }
+
+// This class implements a CREL iterator that does not allocate extra memory.
+template <bool is64> struct RelocsCrel {
+  using uint = std::conditional_t<is64, uint64_t, uint32_t>;
+  struct const_iterator {
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = llvm::object::Elf_Crel_Impl<is64>;
+    using difference_type = ptrdiff_t;
+    using pointer = value_type *;
+    using reference = const value_type &;
+    uint32_t count;
+    uint8_t flagBits, shift;
+    const uint8_t *p;
+    llvm::object::Elf_Crel_Impl<is64> crel{};
+    const_iterator(size_t hdr, const uint8_t *p)
+        : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
+      if (count)
+        step();
+    }
+    void step() {
+      // See object::decodeCrel.
+      const uint8_t b = *p++;
+      crel.r_offset += b >> flagBits << shift;
+      if (b >= 0x80)
+        crel.r_offset +=
+            ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
+      if (b & 1)
+        crel.r_symidx += readSLEB128(p);
+      if (b & 2)
+        crel.r_type += readSLEB128(p);
+      if (b & 4 && flagBits == 3)
+        crel.r_addend += static_cast<uint>(readSLEB128(p));
+    }
+    llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
+    const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
+      return &crel;
+    }
+    // For llvm::enumerate.
+    bool operator==(const const_iterator &r) const { return count == r.count; }
+    bool operator!=(const const_iterator &r) const { return count != r.count; }
+    const_iterator &operator++() {
+      if (--count)
+        step();
+      return *this;
+    }
+    // For RelocationScanner::scanOne.
+    void operator+=(size_t n) {
+      for (; n; --n)
+        operator++();
+    }
+  };
+
+  size_t hdr = 0;
+  const uint8_t *p = nullptr;
+
+  constexpr RelocsCrel() = default;
+  RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
+  size_t size() const { return hdr / 8; }
+  const_iterator begin() const { return {hdr, p}; }
+  const_iterator end() const { return {0, nullptr}; }
+};
+
 template <class RelTy> struct Relocs : ArrayRef<RelTy> {
   Relocs() = default;
   Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
 };
 
+template <bool is64>
+struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
+  using RelocsCrel<is64>::RelocsCrel;
+};
+
 // Return a int64_t to make sure we get the sign extension out of the way as
 // early as possible.
 template <class ELFT>
@@ -220,6 +301,10 @@ template <class ELFT>
 static inline int64_t getAddend(const typename ELFT::Rela &rel) {
   return rel.r_addend;
 }
+template <class ELFT>
+static inline int64_t getAddend(const typename ELFT::Crel &rel) {
+  return rel.r_addend;
+}
 
 template <typename RelTy>
 inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
@@ -235,6 +320,13 @@ inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
   return rels;
 }
 
+template <bool is64>
+inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
+sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
+         SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
+  return {};
+}
+
 // Returns true if Expr refers a GOT entry. Note that this function returns
 // false for TLS variables even though they need GOT, because TLS variables uses
 // GOT differently than the regular variables.
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index b40ff0bc3cb03..41053c6472751 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -455,7 +455,8 @@ template <class ELFT>
 void EhFrameSection::addSectionAux(EhInputSection *sec) {
   if (!sec->isLive())
     return;
-  const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+  const RelsOrRelas<ELFT> rels =
+      sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false);
   if (rels.areRelocsRel())
     addRecords<ELFT>(sec, rels.rels);
   else
@@ -489,7 +490,8 @@ void EhFrameSection::iterateFDEWithLSDA(
   DenseSet<size_t> ciesWithLSDA;
   for (EhInputSection *sec : sections) {
     ciesWithLSDA.clear();
-    const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+    const RelsOrRelas<ELFT> rels =
+        sec->template relsOrRelas<ELFT>(/*supportsCrel=*/false);
     if (rels.areRelocsRel())
       iterateFDEWithLSDAAux<ELFT>(*sec, rels.rels, ciesWithLSDA, fn);
     else
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 5cffdb771a738..8e3a746a08eb2 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -401,10 +401,19 @@ template <class ELFT> static void markUsedLocalSymbols() {
       InputSection *isec = dyn_cast_or_null<InputSection>(s);
       if (!isec)
         continue;
-      if (isec->type == SHT_REL)
+      if (isec->type == SHT_REL) {
         markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rel>());
-      else if (isec->type == SHT_RELA)
+      } else if (isec->type == SHT_RELA) {
         markUsedLocalSymbolsImpl(f, isec->getDataAs<typename ELFT::Rela>());
+      } else if (isec->type == SHT_CREL) {
+        // The is64=true variant also works with ELF32 since only the r_symidx
+        // member is used.
+        for (Elf_Crel_Impl<true> r : RelocsCrel<true>(isec->content_)) {
+          Symbol &sym = file->getSymbol(r.r_symidx);
+          if (sym.isLocal())
+            sym.used = true;
+        }
+      }
     }
   }
 }
diff --git a/lld/test/ELF/crel-rel-mixed.s b/lld/test/ELF/crel-rel-mixed.s
new file mode 100644
index 0000000000000..a69fa1c09b436
--- /dev/null
+++ b/lld/test/ELF/crel-rel-mixed.s
@@ -0,0 +1,22 @@
+# REQUIRES: arm
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=armv7a -crel a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=armv7a b.s -o b.o
+# RUN: not ld.lld -r a.o b.o 2>&1 | FileCheck %s --check-prefix=ERR
+
+# ERR: error: b.o:(.rel.text): REL cannot be converted to CREL
+
+#--- a.s
+.global _start, foo
+_start:
+  bl foo
+  bl .text.foo
+
+.section .text.foo,"ax"
+foo:
+  nop
+
+#--- b.s
+.globl fb
+fb:
+  bl fb
diff --git a/lld/test/ELF/crel.s b/lld/test/ELF/crel.s
new file mode 100644
index 0000000000000..d7c87be9a5402
--- /dev/null
+++ b/lld/test/ELF/crel.s
@@ -0,0 +1,90 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 -crel a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 -crel b.s -o b.o
+# RUN: ld.lld -pie a.o b.o -o out
+# RUN: llvm-objdump -d out | FileCheck %s
+# RUN: llvm-readelf -Srs out | FileCheck %s --check-prefix=RELOC
+
+# CHECK:       <_start>:
+# CHECK-NEXT:    callq {{.*}} <foo>
+# CHECK-NEXT:    callq {{.*}} <foo>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <foo>:
+# CHECK-NEXT:    leaq {{.*}}  # 0x27c
+# CHECK-NEXT:    leaq {{.*}}  # 0x278
+
+# RELOC:  .data             PROGBITS        {{0*}}[[#%x,DATA:]]
+
+# RELOC:  {{0*}}[[#DATA+8]]  0000000000000008 R_X86_64_RELATIVE [[#%x,DATA+0x8000000000000000]]
+
+# RUN: ld.lld -pie --emit-relocs a.o b.o -o out1
+# RUN: llvm-objdump -dr out1 | FileCheck %s --check-prefix=CHECKE
+# RUN: llvm-readelf -Sr out1 | FileCheck %s --check-prefix=RELOCE
+
+# CHECKE:       <_start>:
+# CHECKE-NEXT:    callq {{.*}} <foo>
+# CHECKE-NEXT:      R_X86_64_PLT32 foo-0x4
+# CHECKE-NEXT:    callq {{.*}} <foo>
+# CHECKE-NEXT:      R_X86_64_PLT32 .text+0x6
+# CHECKE-EMPTY:
+# CHECKE-NEXT:  <foo>:
+# CHECKE-NEXT:    leaq {{.*}}
+# CHECKE-NEXT:      R_X86_64_PC32 .L.str-0x4
+# CHECKE-NEXT:    leaq {{.*}}
+# CHECKE-NEXT:      R_X86_64_PC32 .L.str1-0x4
+
+# RELOCE:      .rodata             PROGBITS        {{0*}}[[#%x,RO:]]
+# RELOCE:      .eh_frame           PROGBITS        {{0*}}[[#%x,EHFRAME:]]
+# RELOCE:      .data               PROGBITS        {{0*}}[[#%x,DATA:]]
+
+# RELOCE:      Relocation section '.crel.data' at offset {{.*}} contains 2 entries:
+# RELOCE-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# RELOCE-NEXT: {{0*}}[[#DATA+8]] {{.*}}           R_X86_64_64            {{.*}}           .data - 8000000000000000
+# RELOCE-NEXT: {{0*}}[[#DATA+24]]{{.*}}           R_X86_64_64            {{.*}}           .data - 1
+# RELOCE:      Relocation section '.crel.eh_frame' at offset {{.*}} contains 2 entries:
+# RELOCE-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# RELOCE-NEXT: {{0*}}[[#EHFRAME+32]] {{.*}}       R_X86_64_PC32          {{.*}}           .text + 0
+# RELOCE-NEXT: {{0*}}[[#EHFRAME+52]] {{.*}}       R_X86_64_PC32          {{.*}}           .text + a
+# RELOCE:      Relocation section '.crel.rodata' at offset {{.*}} contains 4 entries:
+# RELOCE-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# RELOCE-NEXT: {{0*}}[[#RO+8]]   {{.*}}           R_X86_64_PC32          {{.*}}           foo + 0
+# RELOCE-NEXT: {{0*}}[[#RO+23]]  {{.*}}           R_X86_64_PC32          {{.*}}           foo + 3f
+# RELOCE-NEXT: {{0*}}[[#RO+39]]  {{.*}}           R_X86_64_PC64          {{.*}}           foo + 7f
+# RELOCE-NEXT: {{0*}}[[#RO+47]]  {{.*}}           R_X86_64_PC32          {{.*}}           _start - 1f81
+
+#--- a.s
+.global _start, foo
+_start:
+  .cfi_startproc # Test .eh_frame
+  call foo
+  call .text.foo
+  .cfi_endproc
+
+.section .text.foo,"ax"
+foo:
+  .cfi_startproc
+  leaq .L.str(%rip), %rsi
+  leaq .L.str1(%rip), %rsi
+  .cfi_endproc
+
+.section .rodata.str1.1,"aMS",@progbits,1
+.L.str:
+  .asciz  "abc"
+.L.str1:
+  .asciz  "def"
+
+.data
+.quad 0
+.quad .data - 0x8000000000000000
+.quad 0
+.quad .data - 1
+
+#--- b.s
+.section .rodata,"a"
+.long foo - .
+.space 15-4
+.long foo - . + 63  # offset+=15
+.space 16-4
+.quad foo - . + 127  # offset+=16
+.long _start - . - 8065
diff --git a/lld/test/ELF/debug-names.s b/lld/test/ELF/debug-names.s
index 888dd9007ed12..1bbb07b065e33 100644
--- a/lld/test/ELF/debug-names.s
+++ b/lld/test/ELF/debug-names.s
@@ -10,7 +10,7 @@
 
 # REQUIRES: x86
 # RUN: rm -rf %t && split-file %s %t && cd %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 --crel a.s -o a.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o
 
 # RUN: ld.lld --debug-names --no-debug-names a.o b.o -o out0
diff --git a/lld/test/ELF/gc-sections.s b/lld/test/ELF/gc-sections.s
index 94adc8210b4bc..31e00d495146a 100644
--- a/lld/test/ELF/gc-sections.s
+++ b/lld/test/ELF/gc-sections.s
@@ -8,6 +8,10 @@
 # RUN: ld.lld --export-dynamic --gc-sections %t -o %t2
 # RUN: llvm-readobj --sections --symbols %t2 | FileCheck -check-prefix=GC2 %s
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 --crel %s -o %t.o
+# RUN: ld.lld --gc-sections --print-gc-sections %t.o -o %t2 | FileCheck --check-prefix=GC1-DISCARD %s
+# RUN: llvm-readobj --sections --symbols %t2 | FileCheck -check-prefix=GC1 %s
+
 # NOGC: Name: .eh_frame
 # NOGC: Name: .text
 # NOGC: Name: .init
diff --git a/lld/test/ELF/icf1.s b/lld/test/ELF/icf1.s
index 5c6e667d53c78..9682b06f4606f 100644
--- a/lld/test/ELF/icf1.s
+++ b/lld/test/ELF/icf1.s
@@ -3,6 +3,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
 # RUN: ld.lld %t -o /dev/null --icf=all --print-icf-sections | FileCheck %s
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 --crel %s -o %t
+# RUN: ld.lld %t -o /dev/null --icf=all --print-icf-sections | FileCheck %s
+
 # CHECK: selected section {{.*}}:(.text.f1)
 # CHECK:   removing identical section {{.*}}:(.text.f2)
 
diff --git a/lld/test/ELF/icf4.s b/lld/test/ELF/icf4.s
index ff13a7ebff3da..310577a55c0d8 100644
--- a/lld/test/ELF/icf4.s
+++ b/lld/test/ELF/icf4.s
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: llvm-mc --crel -filetype=obj -triple=x86_64-unknown-linux %s -o %t
 # RUN: ld.lld %t -o /dev/null --icf=all --print-icf-sections | count 0
 
 .globl _start, f1, f2
diff --git a/lld/test/ELF/linkerscript/nocrossrefs.test b/lld/test/ELF/linkerscript/nocrossrefs.test
index f13d50a03be87..5eb56190fe63b 100644
--- a/lld/test/ELF/linkerscript/nocrossrefs.test
+++ b/lld/test/ELF/linkerscript/nocrossrefs.test
@@ -2,6 +2,7 @@
 # RUN: rm -rf %t && split-file %s %t && cd %t
 
 # RUN: llvm-mc --triple=x86_64 -filetype=obj a.s -o a.o
+# RUN: llvm-mc --triple=x86_64 -filetype=obj -crel a.s -o ac.o
 # RUN: llvm-mc --triple=x86_64 -filetype=obj data.s -o data.o
 # RUN: ld.lld a.o data.o -T 0.t 2>&1 | FileCheck %s --check-prefix=CHECK0 --implicit-check-not=warning:
 
@@ -9,7 +10,8 @@
 # CHECK0-NEXT: warning: 0.t:4: ignored with fewer than 2 output sections
 
 # RUN: not ld.lld a.o data.o -T 1.t 2>&1 | FileCheck %s --check-prefix=CHECK1 --implicit-check-not=error:
-# CHECK1:      error: a.o:(.text.start+0x11): prohibited cross reference from '.text' to 'data' in '.data'
+# RUN: not ld.lld ac.o data.o -T 1.t 2>&1 | FileCheck %s --check-prefix=CHECK1 --implicit-check-not=error:
+# CHECK1:      error: a{{.?}}.o:(.text.start+0x11): prohibited cross reference from '.text' to 'data' in '.data'
 
 ## .text and .text1 are in two NOCROSSREFS commands. Violations are reported twice.
 # RUN: not ld.lld --threads=1 a.o data.o -T 2.t 2>&1 | FileCheck %s --check-prefix=CHECK2 --implicit-check-not=error:
diff --git a/lld/test/ELF/relocatable-crel-32.s b/lld/test/ELF/relocatable-crel-32.s
new file mode 100644
index 0000000000000..8fbf236d77452
--- /dev/null
+++ b/lld/test/ELF/relocatable-crel-32.s
@@ -0,0 +1,71 @@
+# REQUIRES: ppc
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=powerpc -crel a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc -crel b.s -o b.o
+# RUN: ld.lld -r b.o a.o -o out
+# RUN: llvm-readobj -r out | FileCheck %s --check-prefixes=CHECK,CRELFOO
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc a.s -o a1.o
+# RUN: ld.lld -r b.o a1.o -o out1
+# RUN: llvm-readobj -r out1 | FileCheck %s --check-prefixes=CHECK,RELAFOO
+# RUN: ld.lld -r a1.o b.o -o out2
+# RUN: llvm-readobj -r out2 | FileCheck %s --check-prefixes=CHECK2
+
+# CHECK:      Relocations [
+# CHECK-NEXT:   Section (2) .crel.text {
+# CHECK-NEXT:     0x0 R_PPC_REL24 fb 0x0
+# CHECK-NEXT:     0x4 R_PPC_REL24 foo 0x0
+# CHECK-NEXT:     0x8 R_PPC_REL24 .text.foo 0x0
+# CHECK-NEXT:     0xE R_PPC_ADDR16_HA .rodata.str1.1 0x4
+# CHECK-NEXT:     0x12 R_PPC_ADDR16_LO .rodata.str1.1 0x4
+# CHECK-NEXT:     0x16 R_PPC_ADDR16_HA .rodata.str1.1 0x0
+# CHECK-NEXT:     0x1A R_PPC_ADDR16_LO .rodata.str1.1 0x0
+# CHECK-NEXT:   }
+# CRELFOO-NEXT: Section (4) .crel.text.foo {
+# RELAFOO-NEXT: Section (4) .rela.text.foo {
+# CHECK-NEXT:     0x0 R_PPC_REL24 g 0x0
+# CHECK-NEXT:     0x4 R_PPC_REL24 g 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+# CHECK2:      Relocations [
+# CHECK2-NEXT:   Section (2) .crel.text {
+# CHECK2-NEXT:     0x0 R_PPC_REL24 foo 0x0
+# CHECK2-NEXT:     0x4 R_PPC_REL24 .text.foo 0x0
+# CHECK2-NEXT:     0xA R_PPC_ADDR16_HA .rodata.str1.1 0x4
+# CHECK2-NEXT:     0xE R_PPC_ADDR16_LO .rodata.str1.1 0x4
+# CHECK2-NEXT:     0x12 R_PPC_ADDR16_HA .rodata.str1.1 0x0
+# CHECK2-NEXT:     0x16 R_PPC_ADDR16_LO .rodata.str1.1 0x0
+# CHECK2-NEXT:     0x18 R_PPC_REL24 fb 0x0
+# CHECK2-NEXT:   }
+# CHECK2-NEXT:   Section (4) .rela.text.foo {
+# CHECK2-NEXT:     0x0 R_PPC_REL24 g 0x0
+# CHECK2-NEXT:     0x4 R_PPC_REL24 g 0x0
+# CHECK2-NEXT:   }
+# CHECK2-NEXT: ]
+
+#--- a.s
+.global _start, foo
+_start:
+  bl foo
+  bl .text.foo
+  lis 3, .L.str@ha
+  la 3, .L.str@l(3)
+  lis 3, .L.str1@ha
+  la 3, .L.str1@l(3)
+
+.section .text.foo,"ax"
+foo:
+  bl g
+  bl g
+
+.section .rodata.str1.1,"aMS",@progbits,1
+.L.str:
+  .asciz  "abc"
+.L.str1:
+  .asciz  "def"
+
+#--- b.s
+.globl fb
+fb:
+  bl fb
diff --git a/lld/test/ELF/relocatable-crel.s b/lld/test/ELF/relocatable-crel.s
new file mode 100644
index 0000000000000..6e97c3e24d66c
--- /dev/null
+++ b/lld/test/ELF/relocatable-crel.s
@@ -0,0 +1,107 @@
+# REQUIRES: x86
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 -crel a.s -o a.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64 -crel b.s -o b.o
+# RUN: ld.lld -r b.o a.o -o out
+# RUN: llvm-readobj -r out | FileCheck %s --check-prefixes=CHECK,CRELFOO
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a1.o
+# RUN: ld.lld -r b.o a1.o -o out1
+# RUN: llvm-readobj -r out1 | FileCheck %s --check-prefixes=CHECK,RELAFOO
+# RUN: ld.lld -r a1.o b.o -o out2
+# RUN: llvm-readobj -r out2 | FileCheck %s --check-prefixes=CHECK2
+
+# CHECK:      Relocations [
+# CHECK-NEXT:   .crel.text {
+# CHECK-NEXT:     0x1 R_X86_64_PLT32 fb 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:     0x9 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:     0xE R_X86_64_PLT32 .text.foo 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:   }
+# CHECK-NEXT:   .crel.rodata {
+# CHECK-NEXT:     0x0 R_X86_64_PC32 foo 0x0
+# CHECK-NEXT:     0xF R_X86_64_PC32 foo 0x3F
+# CHECK-NEXT:     0x1F R_X86_64_PC64 foo 0x7F
+# CHECK-NEXT:     0x27 R_X86_64_PC32 _start 0xFFFFFFFFFFFFE07F
+# CHECK-COUNT-12:      R_X86_64_32 _start 0x0
+# CHECK-NEXT:   }
+# CRELFOO-NEXT: .crel.text.foo {
+# RELAFOO-NEXT: .rela.text.foo {
+# CHECK-NEXT:     0x3 R_X86_64_PC32 .L.str 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:     0xA R_X86_64_PC32 .L.str1 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:     0xF R_X86_64_PLT32 g 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:     0x14 R_X86_64_PLT32 g 0xFFFFFFFFFFFFFFFC
+# CHECK-NEXT:   }
+# CRELFOO-NEXT: .crel.data {
+# RELAFOO-NEXT: .rela.data {
+# CHECK-NEXT:     0x8 R_X86_64_64 _start 0x8000000000000000
+# CHECK-NEXT:     0x18 R_X86_64_64 _start 0xFFFFFFFFFFFFFFFF
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+# CHECK2:      Relocations [
+# CHECK2-NEXT:   .crel.text {
+# CHECK2-NEXT:     0x1 R_X86_64_PLT32 foo 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:     0x6 R_X86_64_PLT32 .text.foo 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:     0xD R_X86_64_PLT32 fb 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:   }
+# CHECK2-NEXT:   .rela.text.foo {
+# CHECK2-NEXT:     0x3 R_X86_64_PC32 .L.str 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:     0xA R_X86_64_PC32 .L.str1 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:     0xF R_X86_64_PLT32 g 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:     0x14 R_X86_64_PLT32 g 0xFFFFFFFFFFFFFFFC
+# CHECK2-NEXT:   }
+# CHECK2-NEXT:   .rela.data {
+# CHECK2-NEXT:     0x8 R_X86_64_64 _start 0x8000000000000000
+# CHECK2-NEXT:     0x18 R_X86_64_64 _start 0xFFFFFFFFFFFFFFFF
+# CHECK2-NEXT:   }
+# CHECK2-NEXT:   .crel.rodata {
+# CHECK2-NEXT:     0x0 R_X86_64_PC32 foo 0x0
+# CHECK2-NEXT:     0xF R_X86_64_PC32 foo 0x3F
+# CHECK2-NEXT:     0x1F R_X86_64_PC64 foo 0x7F
+# CHECK2-NEXT:     0x27 R_X86_64_PC32 _start 0xFFFFFFFFFFFFE07F
+# CHECK2-COUNT-12:      R_X86_64_32 _start 0x0
+# CHECK2-NEXT:   }
+# CHECK2-NEXT: ]
+
+#--- a.s
+.global _start, foo
+_start:
+  call foo
+  call .text.foo
+
+.section .text.foo,"ax"
+foo:
+  leaq .L.str(%rip), %rsi
+  leaq .L.str1(%rip), %rsi
+  call g
+  call g
+
+.section .rodata.str1.1,"aMS",@progbits,1
+.L.str:
+  .asciz  "abc"
+.L.str1:
+  .asciz  "def"
+
+.data
+.quad 0
+.quad _start - 0x8000000000000000
+.quad 0
+.quad _start - 1
+
+#--- b.s
+.globl fb
+fb:
+  call fb
+
+.section .rodata,"a"
+.long foo - .
+.space 15-4
+.long foo - . + 63  # offset+=15
+.space 16-4
+.quad foo - . + 127  # offset+=16
+.long _start - . - 8065
+
+## Ensure .crel.rodata contains 16 relocations so that getULEB128Size(crelHeader) > 1.
+.rept 12
+.long _start
+.endr

From b2eab3486499656ec6ef30ace5033f80d4d9dfc9 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Fri, 2 Aug 2024 10:53:33 +0200
Subject: [PATCH 091/427] [Clang] Add a release note deprecating __is_nullptr

---
 clang/docs/ReleaseNotes.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b4ef1e9672a5d..c42cb9932f3f7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -447,6 +447,10 @@ Non-comprehensive list of changes in this release
   type of the pointer was taken into account. This improves
   compatibility with GCC's libstdc++.
 
+- The type traits builtin ``__is_nullptr`` is deprecated in CLang 19 and will be
+  removed in Clang 20. ``__is_same(__remove_cv(T), decltype(nullptr))`` can be
+  used instead to check whether a type ``T`` is a ``nullptr``.
+
 New Compiler Flags
 ------------------
 - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
@@ -754,7 +758,7 @@ Improvements to Clang's diagnostics
 
 - Clang now diagnoses dangling assignments for pointer-like objects (annotated with `[[gsl::Pointer]]`) under `-Wdangling-assignment-gsl` (off by default)
   Fixes #GH63310.
-  
+
 - Clang now diagnoses uses of alias templates with a deprecated attribute. (Fixes #GH18236).
 
   .. code-block:: c++

From 7fa3ba52ba4c918298b67ef74891319464017bf4 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <274595+qiongsiwu@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:51:07 -0400
Subject: [PATCH 092/427] [AIX] Turn on `#pragma mc_func` check by default
 (#101336)

https://github.com/llvm/llvm-project/pull/99888 added a check (and
corresponding options) to flag uses of `#pragma mc_func` on AIX.

This PR turns on the check by default.

(cherry picked from commit b9335176db718bf64c72d48107eb9dff28ed979e)
---
 clang/include/clang/Driver/Options.td         | 4 ++--
 clang/include/clang/Lex/PreprocessorOptions.h | 4 ++--
 clang/lib/Driver/ToolChains/AIX.cpp           | 6 ++----
 clang/test/Preprocessor/pragma_mc_func.c      | 6 ++++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 359a698ea87dd..bed6e7af9dce9 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8087,8 +8087,8 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 } // let Visibility = [CC1Option]
 
 defm err_pragma_mc_func_aix : BoolFOption<"err-pragma-mc-func-aix",
-  PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultFalse,
-  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+  PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultTrue,
+  PosFlag<SetTrue, [], [ClangOption],
           "Treat uses of #pragma mc_func as errors">,
   NegFlag<SetFalse,[], [ClangOption, CC1Option],
           "Ignore uses of #pragma mc_func">>;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 3f7dd9db18ba7..f48b7ecb90e1e 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -213,7 +213,7 @@ class PreprocessorOptions {
 
   /// If set, the preprocessor reports an error when processing #pragma mc_func
   /// on AIX.
-  bool ErrorOnPragmaMcfuncOnAIX = false;
+  bool ErrorOnPragmaMcfuncOnAIX = true;
 
 public:
   PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
@@ -252,7 +252,7 @@ class PreprocessorOptions {
     PrecompiledPreambleBytes.first = 0;
     PrecompiledPreambleBytes.second = false;
     RetainExcludedConditionalBlocks = false;
-    ErrorOnPragmaMcfuncOnAIX = false;
+    ErrorOnPragmaMcfuncOnAIX = true;
   }
 };
 
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index fb780fb75651d..0615a8a9e8d17 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -558,10 +558,8 @@ void AIX::addClangTargetOptions(
                               options::OPT_fno_sized_deallocation))
     CC1Args.push_back("-fno-sized-deallocation");
 
-  if (Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix,
-                   options::OPT_fno_err_pragma_mc_func_aix, false))
-    CC1Args.push_back("-ferr-pragma-mc-func-aix");
-  else
+  if (!Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix,
+                    options::OPT_fno_err_pragma_mc_func_aix, true))
     CC1Args.push_back("-fno-err-pragma-mc-func-aix");
 }
 
diff --git a/clang/test/Preprocessor/pragma_mc_func.c b/clang/test/Preprocessor/pragma_mc_func.c
index f0d3e49e5dddc..bf12f7107ff5c 100644
--- a/clang/test/Preprocessor/pragma_mc_func.c
+++ b/clang/test/Preprocessor/pragma_mc_func.c
@@ -1,5 +1,8 @@
+// RUN: not %clang --target=powerpc64-ibm-aix -fsyntax-only %s 2>&1 | FileCheck %s
 // RUN: not %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
 // RUN:   %s 2>&1 | FileCheck %s
+// RUN: not %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix \
+// RUN:   -ferr-pragma-mc-func-aix -fsyntax-only %s 2>&1 | FileCheck %s
 #pragma mc_func asm_barrier {"60000000"}
 
 // CHECK:  error: #pragma mc_func is not supported
@@ -8,11 +11,10 @@
 // RUN: %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix -fsyntax-only %s
 // RUN: %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
 // RUN:    -fno-err-pragma-mc-func-aix %s
-// RUN: %clang --target=powerpc64-ibm-aix -fsyntax-only %s
 // RUN: %clang --target=powerpc64-ibm-aix -Werror=unknown-pragmas \
 // RUN:   -fno-err-pragma-mc-func-aix -fsyntax-only %s
 
-// Cases where we have errors or warnings.
+// Cases on a non-AIX target.
 // RUN: not %clang --target=powerpc64le-unknown-linux-gnu \
 // RUN:   -Werror=unknown-pragmas -fno-err-pragma-mc-func-aix -fsyntax-only %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=UNUSED %s

From 6b52570dcd5d34a9b44ccd0bb2b2d4ffd661d9d7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Wed, 31 Jul 2024 00:28:52 +0800
Subject: [PATCH 093/427] [RISCV] Fix vmerge.vvm/vmv.v.v getting folded into
 ops with mismatching EEW (#101152)

As noted in
https://github.com/llvm/llvm-project/pull/100367/files#r1695448771, we
currently fold in vmerge.vvms and vmv.v.vs into their ops even if the
EEW is different which leads to an incorrect transform.

This checks the op's EEW via its simple value type for now since there
doesn't seem to be any existing information about the EEW size of
instructions. We'll probably need to encode this at some point if we
want to be able to access it at the MachineInstr level in #100367
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |  4 ++++
 llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll    | 14 +++++++++++++
 .../RISCV/rvv/rvv-peephole-vmerge-vops.ll     | 21 +++++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index eef6ae677ac85..db949f3476e2b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3721,6 +3721,10 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
   assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
   assert(!Glue || Glue.getValueType() == MVT::Glue);
 
+  // If the EEW of True is different from vmerge's SEW, then we can't fold.
+  if (True.getSimpleValueType() != N->getSimpleValueType(0))
+    return false;
+
   // We require that either merge and false are the same, or that merge
   // is undefined.
   if (Merge != False && !isImplicitDef(Merge))
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
index ec03f773c7108..dfc2b2bdda026 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
@@ -168,3 +168,17 @@ define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vsc
   %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, iXLen 1)
   ret <vscale x 2 x i32> %b
 }
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen %avl)
+  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen %avl)
+  ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index a08bcae074b9b..259515f160048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1196,3 +1196,24 @@ define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32>
   )
   ret <vscale x 2 x i32> %b
 }
+
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
+  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %a.bitcast,
+    <vscale x 2 x i1> splat (i1 true),
+    i64 %avl
+  )
+  ret <vscale x 2 x i32> %b
+}

From 18ad0209550ed258fc1a24e710613bc5e3e220af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy@ericsson.com>
Date: Fri, 2 Aug 2024 12:43:06 +0200
Subject: [PATCH 094/427] [analyzer] Restore recognition of mutex methods
 (#101511)

Before commit 705788c the checker alpha.unix.BlockInCriticalSection
"recognized" the methods `std::mutex::lock` and `std::mutex::unlock`
with an extremely trivial check that accepted any function (or method)
named lock/unlock.

To avoid matching unrelated user-defined function, this was refined to a
check that also requires the presence of "std" and "mutex" as distinct
parts of the qualified name.

However, as #99628 reported, there are standard library implementations
where some methods of `std::mutex` are inherited from an implementation
detail base class and the new code wasn't able to recognize these
methods, which led to emitting false positive reports.

As a workaround, this commit partially restores the old behavior by
omitting the check for the class name.

In the future, it would be good to replace this hack with a solution
which ensures that `CallDescription` understands inherited methods.

(cherry picked from commit 99ae2edc2592e602b0eb5a287f4d003aa3902440)
---
 .../BlockInCriticalSectionChecker.cpp         | 16 +++++++---
 .../block-in-critical-section-inheritance.cpp | 31 +++++++++++++++++++
 2 files changed, 43 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/Analysis/block-in-critical-section-inheritance.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
index 40f7e9cede1f1..4cd2f2802f30c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
@@ -147,10 +147,18 @@ using MutexDescriptor =
 class BlockInCriticalSectionChecker : public Checker<check::PostCall> {
 private:
   const std::array<MutexDescriptor, 8> MutexDescriptors{
-      MemberMutexDescriptor({/*MatchAs=*/CDM::CXXMethod,
-                             /*QualifiedName=*/{"std", "mutex", "lock"},
-                             /*RequiredArgs=*/0},
-                            {CDM::CXXMethod, {"std", "mutex", "unlock"}, 0}),
+      // NOTE: There are standard library implementations where some methods
+      // of `std::mutex` are inherited from an implementation detail base
+      // class, and those aren't matched by the name specification {"std",
+      // "mutex", "lock"}.
+      // As a workaround here we omit the class name and only require the
+      // presence of the name parts "std" and "lock"/"unlock".
+      // TODO: Ensure that CallDescription understands inherited methods.
+      MemberMutexDescriptor(
+          {/*MatchAs=*/CDM::CXXMethod,
+           /*QualifiedName=*/{"std", /*"mutex",*/ "lock"},
+           /*RequiredArgs=*/0},
+          {CDM::CXXMethod, {"std", /*"mutex",*/ "unlock"}, 0}),
       FirstArgMutexDescriptor({CDM::CLibrary, {"pthread_mutex_lock"}, 1},
                               {CDM::CLibrary, {"pthread_mutex_unlock"}, 1}),
       FirstArgMutexDescriptor({CDM::CLibrary, {"mtx_lock"}, 1},
diff --git a/clang/test/Analysis/block-in-critical-section-inheritance.cpp b/clang/test/Analysis/block-in-critical-section-inheritance.cpp
new file mode 100644
index 0000000000000..db20df8c60a5c
--- /dev/null
+++ b/clang/test/Analysis/block-in-critical-section-inheritance.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_analyze_cc1 \
+// RUN:   -analyzer-checker=unix.BlockInCriticalSection \
+// RUN:   -std=c++11 \
+// RUN:   -analyzer-output text \
+// RUN:   -verify %s
+
+unsigned int sleep(unsigned int seconds) {return 0;}
+namespace std {
+// There are some standard library implementations where some mutex methods
+// come from an implementation detail base class. We need to ensure that these
+// are matched correctly.
+class __mutex_base {
+public:
+  void lock();
+};
+class mutex : public __mutex_base{
+public:
+  void unlock();
+  bool try_lock();
+};
+} // namespace std
+
+void gh_99628() {
+  std::mutex m;
+  m.lock();
+  // expected-note@-1 {{Entering critical section here}}
+  sleep(10);
+  // expected-warning@-1 {{Call to blocking function 'sleep' inside of critical section}}
+  // expected-note@-2 {{Call to blocking function 'sleep' inside of critical section}}
+  m.unlock();
+}

From 9ac3941f8f15241934f05d16df77233af510d1b9 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer@nvidia.com>
Date: Fri, 2 Aug 2024 13:25:35 +0100
Subject: [PATCH 095/427] Ofast deprecation clarifications (#101005)

Following up on the RFC discussion, this is clarifying that the main
purpose and effect of the -Ofast deprecation is to discourage its usage
and that everything else is more or less open for discussion, e.g. there
is no timeline yet for removal.

---------

Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
(cherry picked from commit 48d4d4b641702bf6db03a1bac73b7e13dea28349)
---
 clang/docs/CommandGuide/clang.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index 663aca1f6ddcb..a0c2594d06c61 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -429,8 +429,12 @@ Code Generation Options
 
     :option:`-Ofast` Enables all the optimizations from :option:`-O3` along
     with other aggressive optimizations that may violate strict compliance with
-    language standards. This is deprecated in favor of :option:`-O3`
-    in combination with :option:`-ffast-math`.
+    language standards. This is deprecated in Clang 19 and a warning is emitted
+    that :option:`-O3` in combination with :option:`-ffast-math` should be used
+    instead if the request for non-standard math behavior is intended. There
+    is no timeline yet for removal; the aim is to discourage use of
+    :option:`-Ofast` due to the surprising behavior of an optimization flag
+    changing the observable behavior of correct code.
 
     :option:`-Os` Like :option:`-O2` with extra optimizations to reduce code
     size.

From 4bf04b2eda0cc011f6b6461812a17b50c9b4231a Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi <geek4civic@gmail.com>
Date: Sun, 28 Jul 2024 16:48:23 +0900
Subject: [PATCH 096/427] [Bazel] Use PACKAGE_VERSION for version string.

This enables "-rc" suffix in release branches.

(cherry picked from commit 25efb746d907ce0ffdd9195d191ff0f6944ea3ca)
---
 utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++---
 utils/bazel/llvm-project-overlay/llvm/config.bzl   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 2d7ce8702a5d9..c50dc174a1def 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -4,10 +4,10 @@
 
 load(
     "//:vars.bzl",
-    "LLVM_VERSION",
     "LLVM_VERSION_MAJOR",
     "LLVM_VERSION_MINOR",
     "LLVM_VERSION_PATCH",
+    "PACKAGE_VERSION",
 )
 load("//:workspace_root.bzl", "workspace_root")
 load("//llvm:binary_alias.bzl", "binary_alias")
@@ -553,12 +553,12 @@ genrule(
         "echo '#define CLANG_VERSION_MAJOR_STRING \"{major}\"' >> $@\n" +
         "echo '#define CLANG_VERSION_MINOR {minor}' >> $@\n" +
         "echo '#define CLANG_VERSION_PATCHLEVEL {patch}' >> $@\n" +
-        "echo '#define CLANG_VERSION_STRING \"{vers}git\"' >> $@\n"
+        "echo '#define CLANG_VERSION_STRING \"{vers}\"' >> $@\n"
     ).format(
         major = LLVM_VERSION_MAJOR,
         minor = LLVM_VERSION_MINOR,
         patch = LLVM_VERSION_PATCH,
-        vers = LLVM_VERSION,
+        vers = PACKAGE_VERSION,
     ),
 )
 
diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl
index 2e3bff53ead9d..9de966688eda5 100644
--- a/utils/bazel/llvm-project-overlay/llvm/config.bzl
+++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl
@@ -6,10 +6,10 @@
 
 load(
     "//:vars.bzl",
-    "LLVM_VERSION",
     "LLVM_VERSION_MAJOR",
     "LLVM_VERSION_MINOR",
     "LLVM_VERSION_PATCH",
+    "PACKAGE_VERSION",
 )
 
 def native_arch_defines(arch, triple):
@@ -108,7 +108,7 @@ llvm_config_defines = os_defines + builtin_thread_pointer + select({
     "LLVM_VERSION_MAJOR={}".format(LLVM_VERSION_MAJOR),
     "LLVM_VERSION_MINOR={}".format(LLVM_VERSION_MINOR),
     "LLVM_VERSION_PATCH={}".format(LLVM_VERSION_PATCH),
-    r'LLVM_VERSION_STRING=\"{}git\"'.format(LLVM_VERSION),
+    r'LLVM_VERSION_STRING=\"{}\"'.format(PACKAGE_VERSION),
     # These shouldn't be needed by the C++11 standard, but are for some
     # platforms (e.g. glibc < 2.18. See
     # https://sourceware.org/bugzilla/show_bug.cgi?id=15366). These are also

From eaba8d9860d7d9f65e52e8e7a5ce3de0b8e13b3f Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Thu, 25 Jul 2024 15:14:39 +0800
Subject: [PATCH 097/427] [VP] Refactor VectorBuilder to avoid layering
 violation. NFC (#99276)

This patch refactors the handling of reduction to eliminate layering
violations.

* Introduced `getReductionIntrinsicID` in LoopUtils.h for mapping
recurrence kinds to llvm.vector.reduce.* intrinsic IDs.
* Updated `VectorBuilder::createSimpleTargetReduction` to accept
llvm.vector.reduce.* intrinsic directly.
* New function `VPIntrinsic::getForIntrinsic` for mapping intrinsic ID
to the same functional VP intrinsic ID.

(cherry picked from commit 6d12b3f67df429bffff6e1953d9f55867d7e2469)
---
 llvm/include/llvm/IR/IntrinsicInst.h          |  4 ++
 llvm/include/llvm/IR/VectorBuilder.h          |  5 +-
 .../include/llvm/Transforms/Utils/LoopUtils.h |  4 ++
 llvm/lib/IR/IntrinsicInst.cpp                 | 19 +++++++
 llvm/lib/IR/VectorBuilder.cpp                 | 57 ++-----------------
 llvm/lib/Transforms/Utils/LoopUtils.cpp       | 44 +++++++++++++-
 llvm/unittests/IR/VPIntrinsicTest.cpp         | 53 +++++++++++++++++
 7 files changed, 129 insertions(+), 57 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index fe3f92da400f8..94c8fa092f45e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -569,6 +569,10 @@ class VPIntrinsic : public IntrinsicInst {
   /// The llvm.vp.* intrinsics for this instruction Opcode
   static Intrinsic::ID getForOpcode(unsigned OC);
 
+  /// The llvm.vp.* intrinsics for this intrinsic ID \p Id. Return \p Id if it
+  /// is already a VP intrinsic.
+  static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id);
+
   // Whether \p ID is a VP intrinsic ID.
   static bool isVPIntrinsic(Intrinsic::ID);
 
diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h
index 6af7f6075551d..dbb9f4c7336d5 100644
--- a/llvm/include/llvm/IR/VectorBuilder.h
+++ b/llvm/include/llvm/IR/VectorBuilder.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_IR_VECTORBUILDER_H
 #define LLVM_IR_VECTORBUILDER_H
 
-#include <llvm/Analysis/IVDescriptors.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/InstrTypes.h>
 #include <llvm/IR/Instruction.h>
@@ -100,11 +99,11 @@ class VectorBuilder {
                                  const Twine &Name = Twine());
 
   /// Emit a VP reduction intrinsic call for recurrence kind.
-  /// \param Kind        The kind of recurrence
+  /// \param RdxID       The intrinsic ID of llvm.vector.reduce.*
   /// \param ValTy       The type of operand which the reduction operation is
   ///                    performed.
   /// \param VecOpArray  The operand list.
-  Value *createSimpleTargetReduction(RecurKind Kind, Type *ValTy,
+  Value *createSimpleTargetReduction(Intrinsic::ID RdxID, Type *ValTy,
                                      ArrayRef<Value *> VecOpArray,
                                      const Twine &Name = Twine());
 };
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index b01a447f3c28b..56880bd4822c7 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -359,6 +359,10 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
                         SinkAndHoistLICMFlags &LICMFlags,
                         OptimizationRemarkEmitter *ORE = nullptr);
 
+/// Returns the llvm.vector.reduce intrinsic that corresponds to the recurrence
+/// kind.
+constexpr Intrinsic::ID getReductionIntrinsicID(RecurKind RK);
+
 /// Returns the arithmetic instruction opcode used when expanding a reduction.
 unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID);
 
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 64a14da55b15e..db3b0196f66fd 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -599,6 +599,25 @@ Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) {
   return Intrinsic::not_intrinsic;
 }
 
+constexpr static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id) {
+  if (::isVPIntrinsic(Id))
+    return Id;
+
+  switch (Id) {
+  default:
+    break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) case Intrinsic::INTRIN:
+#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+  return Intrinsic::not_intrinsic;
+}
+
+Intrinsic::ID VPIntrinsic::getForIntrinsic(Intrinsic::ID Id) {
+  return ::getForIntrinsic(Id);
+}
+
 bool VPIntrinsic::canIgnoreVectorLengthParam() const {
   using namespace PatternMatch;
 
diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp
index 5ff3082879895..8dbf25277bf5d 100644
--- a/llvm/lib/IR/VectorBuilder.cpp
+++ b/llvm/lib/IR/VectorBuilder.cpp
@@ -60,60 +60,13 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
   return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
 }
 
-Value *VectorBuilder::createSimpleTargetReduction(RecurKind Kind, Type *ValTy,
+Value *VectorBuilder::createSimpleTargetReduction(Intrinsic::ID RdxID,
+                                                  Type *ValTy,
                                                   ArrayRef<Value *> InstOpArray,
                                                   const Twine &Name) {
-  Intrinsic::ID VPID;
-  switch (Kind) {
-  case RecurKind::Add:
-    VPID = Intrinsic::vp_reduce_add;
-    break;
-  case RecurKind::Mul:
-    VPID = Intrinsic::vp_reduce_mul;
-    break;
-  case RecurKind::And:
-    VPID = Intrinsic::vp_reduce_and;
-    break;
-  case RecurKind::Or:
-    VPID = Intrinsic::vp_reduce_or;
-    break;
-  case RecurKind::Xor:
-    VPID = Intrinsic::vp_reduce_xor;
-    break;
-  case RecurKind::FMulAdd:
-  case RecurKind::FAdd:
-    VPID = Intrinsic::vp_reduce_fadd;
-    break;
-  case RecurKind::FMul:
-    VPID = Intrinsic::vp_reduce_fmul;
-    break;
-  case RecurKind::SMax:
-    VPID = Intrinsic::vp_reduce_smax;
-    break;
-  case RecurKind::SMin:
-    VPID = Intrinsic::vp_reduce_smin;
-    break;
-  case RecurKind::UMax:
-    VPID = Intrinsic::vp_reduce_umax;
-    break;
-  case RecurKind::UMin:
-    VPID = Intrinsic::vp_reduce_umin;
-    break;
-  case RecurKind::FMax:
-    VPID = Intrinsic::vp_reduce_fmax;
-    break;
-  case RecurKind::FMin:
-    VPID = Intrinsic::vp_reduce_fmin;
-    break;
-  case RecurKind::FMaximum:
-    VPID = Intrinsic::vp_reduce_fmaximum;
-    break;
-  case RecurKind::FMinimum:
-    VPID = Intrinsic::vp_reduce_fminimum;
-    break;
-  default:
-    llvm_unreachable("No VPIntrinsic for this reduction");
-  }
+  auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
+  assert(VPReductionIntrinsic::isVPReduction(VPID) &&
+         "No VPIntrinsic for this reduction");
   return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
 }
 
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 4609376a748f9..0abf6d77496dc 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -918,6 +918,44 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
   return true;
 }
 
+constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
+  switch (RK) {
+  default:
+    llvm_unreachable("Unexpected recurrence kind");
+  case RecurKind::Add:
+    return Intrinsic::vector_reduce_add;
+  case RecurKind::Mul:
+    return Intrinsic::vector_reduce_mul;
+  case RecurKind::And:
+    return Intrinsic::vector_reduce_and;
+  case RecurKind::Or:
+    return Intrinsic::vector_reduce_or;
+  case RecurKind::Xor:
+    return Intrinsic::vector_reduce_xor;
+  case RecurKind::FMulAdd:
+  case RecurKind::FAdd:
+    return Intrinsic::vector_reduce_fadd;
+  case RecurKind::FMul:
+    return Intrinsic::vector_reduce_fmul;
+  case RecurKind::SMax:
+    return Intrinsic::vector_reduce_smax;
+  case RecurKind::SMin:
+    return Intrinsic::vector_reduce_smin;
+  case RecurKind::UMax:
+    return Intrinsic::vector_reduce_umax;
+  case RecurKind::UMin:
+    return Intrinsic::vector_reduce_umin;
+  case RecurKind::FMax:
+    return Intrinsic::vector_reduce_fmax;
+  case RecurKind::FMin:
+    return Intrinsic::vector_reduce_fmin;
+  case RecurKind::FMaximum:
+    return Intrinsic::vector_reduce_fmaximum;
+  case RecurKind::FMinimum:
+    return Intrinsic::vector_reduce_fminimum;
+  }
+}
+
 unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {
   switch (RdxID) {
   case Intrinsic::vector_reduce_fadd:
@@ -1215,12 +1253,13 @@ Value *llvm::createSimpleTargetReduction(VectorBuilder &VBuilder, Value *Src,
   RecurKind Kind = Desc.getRecurrenceKind();
   assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
          "AnyOf reduction is not supported.");
+  Intrinsic::ID Id = getReductionIntrinsicID(Kind);
   auto *SrcTy = cast<VectorType>(Src->getType());
   Type *SrcEltTy = SrcTy->getElementType();
   Value *Iden =
       Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
   Value *Ops[] = {Iden, Src};
-  return VBuilder.createSimpleTargetReduction(Kind, SrcTy, Ops);
+  return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);
 }
 
 Value *llvm::createTargetReduction(IRBuilderBase &B,
@@ -1260,9 +1299,10 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
   assert(Src->getType()->isVectorTy() && "Expected a vector type");
   assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
 
+  Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
   auto *SrcTy = cast<VectorType>(Src->getType());
   Value *Ops[] = {Start, Src};
-  return VBuilder.createSimpleTargetReduction(RecurKind::FAdd, SrcTy, Ops);
+  return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);
 }
 
 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index eab2850ca4e1e..cf0a10d1f2e95 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -367,6 +367,59 @@ TEST_F(VPIntrinsicTest, IntrinsicIDRoundTrip) {
   ASSERT_NE(FullTripCounts, 0u);
 }
 
+/// Check that going from intrinsic to VP intrinsic and back results in the same
+/// intrinsic.
+TEST_F(VPIntrinsicTest, IntrinsicToVPRoundTrip) {
+  bool IsFullTrip = false;
+  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic + 1;
+  for (; IntrinsicID < Intrinsic::num_intrinsics; IntrinsicID++) {
+    Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(IntrinsicID);
+    // No equivalent VP intrinsic available.
+    if (VPID == Intrinsic::not_intrinsic)
+      continue;
+
+    // Return itself if passed intrinsic ID is VP intrinsic.
+    if (VPIntrinsic::isVPIntrinsic(IntrinsicID)) {
+      ASSERT_EQ(IntrinsicID, VPID);
+      continue;
+    }
+
+    std::optional<Intrinsic::ID> RoundTripIntrinsicID =
+        VPIntrinsic::getFunctionalIntrinsicIDForVP(VPID);
+    // No equivalent non-predicated intrinsic available.
+    if (!RoundTripIntrinsicID)
+      continue;
+
+    ASSERT_EQ(*RoundTripIntrinsicID, IntrinsicID);
+    IsFullTrip = true;
+  }
+  ASSERT_TRUE(IsFullTrip);
+}
+
+/// Check that going from VP intrinsic to equivalent non-predicated intrinsic
+/// and back results in the same intrinsic.
+TEST_F(VPIntrinsicTest, VPToNonPredIntrinsicRoundTrip) {
+  std::unique_ptr<Module> M = createVPDeclarationModule();
+  assert(M);
+
+  bool IsFullTrip = false;
+  for (const auto &VPDecl : *M) {
+    auto VPID = VPDecl.getIntrinsicID();
+    std::optional<Intrinsic::ID> NonPredID =
+        VPIntrinsic::getFunctionalIntrinsicIDForVP(VPID);
+
+    // No equivalent non-predicated intrinsic available
+    if (!NonPredID)
+      continue;
+
+    Intrinsic::ID RoundTripVPID = VPIntrinsic::getForIntrinsic(*NonPredID);
+
+    ASSERT_EQ(RoundTripVPID, VPID);
+    IsFullTrip = true;
+  }
+  ASSERT_TRUE(IsFullTrip);
+}
+
 /// Check that VPIntrinsic::getDeclarationForParams works.
 TEST_F(VPIntrinsicTest, VPIntrinsicDeclarationForParams) {
   std::unique_ptr<Module> M = createVPDeclarationModule();

From 0ee444283e6fc3b40c17a75ba5c073dbceca6f14 Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi <geek4civic@gmail.com>
Date: Fri, 26 Jul 2024 10:03:17 +0900
Subject: [PATCH 098/427] Revert "[llvm][Bazel] Adapt to
 4eb30cfb3474e3770b465cdb39db3b7f6404c3ef"

Since #99276 has been landed, the dependency has become redundant.

This reverts commit aa94a43178e1e1fa4dbe7ee802d46623667067ae.
(llvmorg-19-init-17718-gaa94a43178e1)

(cherry picked from commit 5bf085921ec23e5fa1ea4a159c55a618a9299ce6)
---
 utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 64d36c7b7f664..4d443e809d55b 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -944,10 +944,7 @@ cc_library(
     srcs = glob([
         "lib/IR/*.cpp",
         "lib/IR/*.h",
-    ]) + [
-        # To avoid a dependency cycle.
-        "include/llvm/Analysis/IVDescriptors.h",
-    ],
+    ]),
     hdrs = glob(
         [
             "include/llvm/*.h",

From 2213bdeb0077d6e89344950568bb321fdef50a86 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 08:59:12 +0200
Subject: [PATCH 099/427] [sanitizer_common] Fix signal_line.cpp on SPARC
 (#100535)

```
  SanitizerCommon-ubsan-sparc-Linux :: Linux/signal_line.cpp
```
currently `FAIL`s on Linux/sparc64 (32 and 64-bit) for `n == 2`. Instead
of the expected `SIGSEGV`, the test dies with `SIGBUS`. `strace` reveals
that this is due to a unaligned access:
```
--- SIGBUS {si_signo=SIGBUS, si_code=BUS_ADRALN, si_addr=0x1} ---
```
which is to be expected on a strict-alignment target like SPARC. Fixed
by changing the invalid pointer to be better aligned.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit 94394ca980f8ecbd845155d2170cfd865e4d62dc)
---
 .../test/sanitizer_common/TestCases/Linux/signal_line.cpp      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp
index 208ece3e05af4..f1afd859c207a 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp
@@ -20,7 +20,8 @@ int main(int argc, char **argv) {
   // CHECK1: SUMMARY: [[SAN]]: SEGV {{.*}}signal_line.cpp:[[@LINE-2]]:[[TAB]] in main
 
   if (n == 2)
-    *((volatile int *)0x1) = __LINE__;
+    // Allow for strict-alignment targets that require natural alignment.
+    *((volatile int *)0x8) = __LINE__;
   // CHECK2: #{{[0-9]+ .*}}main {{.*}}signal_line.cpp:[[@LINE-1]]:[[TAB:[0-9]+]]
   // CHECK2: SUMMARY: [[SAN]]: SEGV {{.*}}signal_line.cpp:[[@LINE-2]]:[[TAB]] in main
 }

From 7fc0bae294aa174ca5c1f85bb2955ededdfd6eb5 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 09:02:05 +0200
Subject: [PATCH 100/427] =?UTF-8?q?[sanitizer=5Fcommon][test]=20Fix=20Inte?=
 =?UTF-8?q?rnalMmapWithOffset=20on=2032-bit=20Linux/s=E2=80=A6=20(#101011)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…parc64

```
  SanitizerCommon-Unit :: ./Sanitizer-sparc-Test/SanitizerCommon/InternalMmapWithOffset

```
`FAIL`s on 32-bit Linux/sparc64:
```
projects/compiler-rt/lib/sanitizer_common/tests/./Sanitizer-sparc-Test --gtest_filter=SanitizerCommon.InternalMmapWithOffset
--
compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp:335: Failure
Expected equality of these values:
  'A'
    Which is: 'A' (65, 0x41)
  p[0]
    Which is: '\0'
```
It turns out the `pgoffset` arg to `mmap2` is passed incorrectly in this
case, unlike the 64-bit test. The caller, `MapWritableFileToMemory`,
passes an `u64` arg, while `mmap2` expects an `off_t`. This patch casts
the arg accordingly.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit 1c25f2cd470c2882e422b66d0482f5a120960394)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 76acf591871ab..1d6a55bdb7f38 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -220,7 +220,7 @@ uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
   // mmap2 specifies file offset in 4096-byte units.
   CHECK(IsAligned(offset, 4096));
   return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd,
-                          offset / 4096);
+                          (OFF_T)(offset / 4096));
 #      endif
 }
 #    endif  // !SANITIZER_S390

From 7bfc4abb54db7ba2fc6ddf53f30bed752af57626 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Mon, 29 Jul 2024 11:23:25 +0100
Subject: [PATCH 101/427] Reland: "[Clang] Demote always_inline error to
 warning for mismatching SME attrs" (#100991) (#100996)

Test `aarch64-sme-inline-streaming-attrs.c` caused some buildbot
failures, because the test was missing a `REQUIRES: aarch64-registered
target`. This was because we've demoted the error to a warning, which
then resulted in a different error message, because Clang can't actually
CodeGen the IR.

(cherry picked from commit 389679d5f9055bffe8bbd25ae41f084a8d08e0f8)
---
 clang/include/clang/Basic/DiagnosticFrontendKinds.td    | 3 +++
 clang/lib/CodeGen/Targets/AArch64.cpp                   | 6 ++++--
 clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c | 8 +++++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 12a4617c64d87..8a1462c670d68 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -288,6 +288,9 @@ def err_function_needs_feature : Error<
 let CategoryName = "Codegen ABI Check" in {
 def err_function_always_inline_attribute_mismatch : Error<
   "always_inline function %1 and its caller %0 have mismatching %2 attributes">;
+def warn_function_always_inline_attribute_mismatch : Warning<
+  "always_inline function %1 and its caller %0 have mismatching %2 attributes, "
+  "inlining may change runtime behaviour">, InGroup<AArch64SMEAttributes>;
 def err_function_always_inline_new_za : Error<
   "always_inline function %0 has new za state">;
 
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index b9df54b0c67c4..1dec3cd40ebd2 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -883,8 +883,10 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
 
   if (!CalleeIsStreamingCompatible &&
       (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible))
-    CGM.getDiags().Report(CallLoc,
-                          diag::err_function_always_inline_attribute_mismatch)
+    CGM.getDiags().Report(
+        CallLoc, CalleeIsStreaming
+                     ? diag::err_function_always_inline_attribute_mismatch
+                     : diag::warn_function_always_inline_attribute_mismatch)
         << Caller->getDeclName() << Callee->getDeclName() << "streaming";
   if (auto *NewAttr = Callee->getAttr<ArmNewAttr>())
     if (NewAttr->isNewZA())
diff --git a/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c b/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c
index 25aebeced9379..9c3d08a25945a 100644
--- a/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c
+++ b/clang/test/CodeGen/aarch64-sme-inline-streaming-attrs.c
@@ -3,6 +3,8 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_STREAMING %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -o /dev/null -target-feature +sme -verify -DTEST_LOCALLY %s
 
+// REQUIRES: aarch64-registered-target
+
 #define __ai __attribute__((always_inline))
 __ai void inlined_fn(void) {}
 __ai void inlined_fn_streaming_compatible(void) __arm_streaming_compatible {}
@@ -20,7 +22,7 @@ void caller(void) {
 
 #ifdef TEST_COMPATIBLE
 void caller_compatible(void) __arm_streaming_compatible {
-    inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_compatible' have mismatching streaming attributes}}
+    inlined_fn(); // expected-warning {{always_inline function 'inlined_fn' and its caller 'caller_compatible' have mismatching streaming attributes, inlining may change runtime behaviour}}
     inlined_fn_streaming_compatible();
     inlined_fn_streaming(); // expected-error {{always_inline function 'inlined_fn_streaming' and its caller 'caller_compatible' have mismatching streaming attributes}}
     inlined_fn_local(); // expected-error {{always_inline function 'inlined_fn_local' and its caller 'caller_compatible' have mismatching streaming attributes}}
@@ -29,7 +31,7 @@ void caller_compatible(void) __arm_streaming_compatible {
 
 #ifdef TEST_STREAMING
 void caller_streaming(void) __arm_streaming {
-    inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_streaming' have mismatching streaming attributes}}
+    inlined_fn(); // expected-warning {{always_inline function 'inlined_fn' and its caller 'caller_streaming' have mismatching streaming attributes, inlining may change runtime behaviour}}
     inlined_fn_streaming_compatible();
     inlined_fn_streaming();
     inlined_fn_local();
@@ -39,7 +41,7 @@ void caller_streaming(void) __arm_streaming {
 #ifdef TEST_LOCALLY
 __arm_locally_streaming
 void caller_local(void) {
-    inlined_fn(); // expected-error {{always_inline function 'inlined_fn' and its caller 'caller_local' have mismatching streaming attributes}}
+    inlined_fn(); // expected-warning {{always_inline function 'inlined_fn' and its caller 'caller_local' have mismatching streaming attributes, inlining may change runtime behaviour}}
     inlined_fn_streaming_compatible();
     inlined_fn_streaming();
     inlined_fn_local();

From 56f4adeee36560b839d0d04d4a5a8d8935006f63 Mon Sep 17 00:00:00 2001
From: Mital Ashok <mital@mitalashok.co.uk>
Date: Thu, 1 Aug 2024 15:05:46 +0100
Subject: [PATCH 102/427] [Clang] Fix definition of layout-compatible to ignore
 empty classes (#92103)

Also changes the behaviour of `__builtin_is_layout_compatible`

None of the historic nor the current definition of layout-compatible
classes mention anything about base classes (other than implicitly
through being standard-layout) and are defined in terms of members, not
direct members.
---
 clang/include/clang/AST/DeclCXX.h  |  7 +++
 clang/lib/AST/DeclCXX.cpp          | 36 +++++++++++++++
 clang/lib/Sema/SemaChecking.cpp    | 74 ++++++++++--------------------
 clang/test/SemaCXX/type-traits.cpp | 11 +++++
 llvm/include/llvm/ADT/STLExtras.h  |  6 +++
 5 files changed, 84 insertions(+), 50 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index fb52ac804849d..0923736a95f97 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1210,6 +1210,13 @@ class CXXRecordDecl : public RecordDecl {
     return D.HasPublicFields || D.HasProtectedFields || D.HasPrivateFields;
   }
 
+  /// If this is a standard-layout class or union, any and all data members will
+  /// be declared in the same type.
+  ///
+  /// This retrieves the type where any fields are declared,
+  /// or the current class if there is no class with fields.
+  const CXXRecordDecl *getStandardLayoutBaseWithFields() const;
+
   /// Whether this class is polymorphic (C++ [class.virtual]),
   /// which means that the class contains or inherits a virtual function.
   bool isPolymorphic() const { return data().Polymorphic; }
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index b573c2713a3aa..9a3ede426e914 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -561,6 +561,42 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
     data().StructuralIfLiteral = false;
 }
 
+const CXXRecordDecl *CXXRecordDecl::getStandardLayoutBaseWithFields() const {
+  assert(
+      isStandardLayout() &&
+      "getStandardLayoutBaseWithFields called on a non-standard-layout type");
+#ifdef EXPENSIVE_CHECKS
+  {
+    unsigned NumberOfBasesWithFields = 0;
+    if (!field_empty())
+      ++NumberOfBasesWithFields;
+    llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases;
+    forallBases([&](const CXXRecordDecl *Base) -> bool {
+      if (!Base->field_empty())
+        ++NumberOfBasesWithFields;
+      assert(
+          UniqueBases.insert(Base->getCanonicalDecl()).second &&
+          "Standard layout struct has multiple base classes of the same type");
+      return true;
+    });
+    assert(NumberOfBasesWithFields <= 1 &&
+           "Standard layout struct has fields declared in more than one class");
+  }
+#endif
+  if (!field_empty())
+    return this;
+  const CXXRecordDecl *Result = this;
+  forallBases([&](const CXXRecordDecl *Base) -> bool {
+    if (!Base->field_empty()) {
+      // This is the base where the fields are declared; return early
+      Result = Base;
+      return false;
+    }
+    return true;
+  });
+  return Result;
+}
+
 bool CXXRecordDecl::hasConstexprDestructor() const {
   auto *Dtor = getDestructor();
   return Dtor ? Dtor->isConstexpr() : defaultedDestructorIsConstexpr();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cf1196ad23c21..9088b5e285bf8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -13664,10 +13664,11 @@ void Sema::DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr,
 
 //===--- Layout compatibility ----------------------------------------------//
 
-static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2);
+static bool isLayoutCompatible(const ASTContext &C, QualType T1, QualType T2);
 
 /// Check if two enumeration types are layout-compatible.
-static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) {
+static bool isLayoutCompatible(const ASTContext &C, const EnumDecl *ED1,
+                               const EnumDecl *ED2) {
   // C++11 [dcl.enum] p8:
   // Two enumeration types are layout-compatible if they have the same
   // underlying type.
@@ -13678,8 +13679,8 @@ static bool isLayoutCompatible(ASTContext &C, EnumDecl *ED1, EnumDecl *ED2) {
 /// Check if two fields are layout-compatible.
 /// Can be used on union members, which are exempt from alignment requirement
 /// of common initial sequence.
-static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1,
-                               FieldDecl *Field2,
+static bool isLayoutCompatible(const ASTContext &C, const FieldDecl *Field1,
+                               const FieldDecl *Field2,
                                bool AreUnionMembers = false) {
   [[maybe_unused]] const Type *Field1Parent =
       Field1->getParent()->getTypeForDecl();
@@ -13722,60 +13723,33 @@ static bool isLayoutCompatible(ASTContext &C, FieldDecl *Field1,
 
 /// Check if two standard-layout structs are layout-compatible.
 /// (C++11 [class.mem] p17)
-static bool isLayoutCompatibleStruct(ASTContext &C, RecordDecl *RD1,
-                                     RecordDecl *RD2) {
-  // If both records are C++ classes, check that base classes match.
-  if (const CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(RD1)) {
-    // If one of records is a CXXRecordDecl we are in C++ mode,
-    // thus the other one is a CXXRecordDecl, too.
-    const CXXRecordDecl *D2CXX = cast<CXXRecordDecl>(RD2);
-    // Check number of base classes.
-    if (D1CXX->getNumBases() != D2CXX->getNumBases())
-      return false;
+static bool isLayoutCompatibleStruct(const ASTContext &C, const RecordDecl *RD1,
+                                     const RecordDecl *RD2) {
+  // Get to the class where the fields are declared
+  if (const CXXRecordDecl *D1CXX = dyn_cast<CXXRecordDecl>(RD1))
+    RD1 = D1CXX->getStandardLayoutBaseWithFields();
 
-    // Check the base classes.
-    for (CXXRecordDecl::base_class_const_iterator
-               Base1 = D1CXX->bases_begin(),
-           BaseEnd1 = D1CXX->bases_end(),
-              Base2 = D2CXX->bases_begin();
-         Base1 != BaseEnd1;
-         ++Base1, ++Base2) {
-      if (!isLayoutCompatible(C, Base1->getType(), Base2->getType()))
-        return false;
-    }
-  } else if (const CXXRecordDecl *D2CXX = dyn_cast<CXXRecordDecl>(RD2)) {
-    // If only RD2 is a C++ class, it should have zero base classes.
-    if (D2CXX->getNumBases() > 0)
-      return false;
-  }
+  if (const CXXRecordDecl *D2CXX = dyn_cast<CXXRecordDecl>(RD2))
+    RD2 = D2CXX->getStandardLayoutBaseWithFields();
 
   // Check the fields.
-  RecordDecl::field_iterator Field2 = RD2->field_begin(),
-                             Field2End = RD2->field_end(),
-                             Field1 = RD1->field_begin(),
-                             Field1End = RD1->field_end();
-  for ( ; Field1 != Field1End && Field2 != Field2End; ++Field1, ++Field2) {
-    if (!isLayoutCompatible(C, *Field1, *Field2))
-      return false;
-  }
-  if (Field1 != Field1End || Field2 != Field2End)
-    return false;
-
-  return true;
+  return llvm::equal(RD1->fields(), RD2->fields(),
+                     [&C](const FieldDecl *F1, const FieldDecl *F2) -> bool {
+                       return isLayoutCompatible(C, F1, F2);
+                     });
 }
 
 /// Check if two standard-layout unions are layout-compatible.
 /// (C++11 [class.mem] p18)
-static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1,
-                                    RecordDecl *RD2) {
-  llvm::SmallPtrSet<FieldDecl *, 8> UnmatchedFields;
+static bool isLayoutCompatibleUnion(const ASTContext &C, const RecordDecl *RD1,
+                                    const RecordDecl *RD2) {
+  llvm::SmallPtrSet<const FieldDecl *, 8> UnmatchedFields;
   for (auto *Field2 : RD2->fields())
     UnmatchedFields.insert(Field2);
 
   for (auto *Field1 : RD1->fields()) {
-    llvm::SmallPtrSet<FieldDecl *, 8>::iterator
-        I = UnmatchedFields.begin(),
-        E = UnmatchedFields.end();
+    auto I = UnmatchedFields.begin();
+    auto E = UnmatchedFields.end();
 
     for ( ; I != E; ++I) {
       if (isLayoutCompatible(C, Field1, *I, /*IsUnionMember=*/true)) {
@@ -13792,8 +13766,8 @@ static bool isLayoutCompatibleUnion(ASTContext &C, RecordDecl *RD1,
   return UnmatchedFields.empty();
 }
 
-static bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1,
-                               RecordDecl *RD2) {
+static bool isLayoutCompatible(const ASTContext &C, const RecordDecl *RD1,
+                               const RecordDecl *RD2) {
   if (RD1->isUnion() != RD2->isUnion())
     return false;
 
@@ -13804,7 +13778,7 @@ static bool isLayoutCompatible(ASTContext &C, RecordDecl *RD1,
 }
 
 /// Check if two types are layout-compatible in C++11 sense.
-static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) {
+static bool isLayoutCompatible(const ASTContext &C, QualType T1, QualType T2) {
   if (T1.isNull() || T2.isNull())
     return false;
 
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 23b07cac13eaf..7c5be2ab374a7 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -1738,6 +1738,11 @@ struct CStructWithFMA2 {
   int f[];
 };
 
+template<int N>
+struct UniqueEmpty {};
+template<typename... Bases>
+struct D : Bases... {};
+
 void is_layout_compatible(int n)
 {
   static_assert(__is_layout_compatible(void, void));
@@ -1841,6 +1846,12 @@ void is_layout_compatible(int n)
   static_assert(!__is_layout_compatible(EnumClassLayout, int));
   static_assert(!__is_layout_compatible(EnumForward, int));
   static_assert(!__is_layout_compatible(EnumClassForward, int));
+  static_assert(__is_layout_compatible(CStruct, D<CStruct>));
+  static_assert(__is_layout_compatible(CStruct, D<UniqueEmpty<0>, CStruct>));
+  static_assert(__is_layout_compatible(CStruct, D<UniqueEmpty<0>, D<UniqueEmpty<1>, CStruct>, D<UniqueEmpty<2>>>));
+  static_assert(__is_layout_compatible(CStruct, D<CStructWithQualifiers>));
+  static_assert(__is_layout_compatible(CStruct, D<UniqueEmpty<0>, CStructWithQualifiers>));
+  static_assert(__is_layout_compatible(CStructWithQualifiers, D<UniqueEmpty<0>, D<UniqueEmpty<1>, CStruct>, D<UniqueEmpty<2>>>));
 }
 
 namespace IPIBO {
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index e34068592de81..8f988d01cb2a6 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -2027,6 +2027,12 @@ template <typename L, typename R> bool equal(L &&LRange, R &&RRange) {
                     adl_end(RRange));
 }
 
+template <typename L, typename R, typename BinaryPredicate>
+bool equal(L &&LRange, R &&RRange, BinaryPredicate P) {
+  return std::equal(adl_begin(LRange), adl_end(LRange), adl_begin(RRange),
+                    adl_end(RRange), P);
+}
+
 /// Returns true if all elements in Range are equal or when the Range is empty.
 template <typename R> bool all_equal(R &&Range) {
   auto Begin = adl_begin(Range);

From a444324b6a5f687a235c402e05e7a79e3b17bd7b Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp@ca.ibm.com>
Date: Mon, 29 Jul 2024 11:17:04 -0400
Subject: [PATCH 103/427] [PowerPC] Add phony subregisters to cover the high
 half of the VSX registers. (#94628)

On PowerPC there are 128 bit VSX registers. These registers are half
overlapped with 64 bit floating point registers (FPR). The 64 bit half
of the VXS register that does not overlap with the FPR does not overlap
with any other register class. The FPR are the only subregisters of the
VSX registers but they do not fully cover the 128 bit super register.
This leads to incorrect lane masks being created.

This patch adds phony registers for the other half of the VSX registers
in order to fully cover them and to make sure that the lane masks are
not the same for the VSX and the floating point register.

(cherry picked from commit 53c37f300dd1b450671f2aee4cc649c380adb5ad)
---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.td    |  35 +-
 .../CodeGen/PowerPC/aix-vec_insert_elt.ll     |   4 -
 .../aix32-p8-scalar_vector_conversions.ll     |   2 -
 .../CodeGen/PowerPC/build-vector-tests.ll     |  96 -----
 .../builtins-ppc-xlcompat-pwr9-64bit.ll       |   1 -
 .../PowerPC/canonical-merge-shuffles.ll       |   6 -
 llvm/test/CodeGen/PowerPC/combine-fneg.ll     |   1 -
 llvm/test/CodeGen/PowerPC/constant-pool.ll    |   8 -
 llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll   |   5 +-
 llvm/test/CodeGen/PowerPC/fma-combine.ll      |   2 -
 llvm/test/CodeGen/PowerPC/fp-strict-round.ll  |   8 -
 llvm/test/CodeGen/PowerPC/frem.ll             |   5 -
 .../PowerPC/handle-f16-storage-type.ll        |  13 +-
 llvm/test/CodeGen/PowerPC/ldexp.ll            |   4 -
 .../PowerPC/p10-splatImm-CPload-pcrel.ll      |  12 -
 .../PowerPC/p8-scalar_vector_conversions.ll   |   6 -
 .../PowerPC/pcrel-call-linkage-leaf.ll        |   1 -
 llvm/test/CodeGen/PowerPC/select_const.ll     |   2 -
 .../test/CodeGen/PowerPC/subreg-coalescer.mir |   1 -
 .../test/CodeGen/PowerPC/subreg-lanemasks.mir |  23 +-
 llvm/test/CodeGen/PowerPC/toc-float.ll        |   2 -
 .../PowerPC/variable_elem_vec_extracts.ll     |   3 -
 llvm/test/CodeGen/PowerPC/vec_insert_elt.ll   |   4 -
 .../vector-constrained-fp-intrinsics.ll       | 332 +-----------------
 llvm/test/CodeGen/PowerPC/vector-llrint.ll    |  28 +-
 llvm/test/CodeGen/PowerPC/vector-lrint.ll     |  28 +-
 .../CodeGen/PowerPC/vector-reduce-fadd.ll     |  40 ---
 .../CodeGen/PowerPC/vector-reduce-fmax.ll     |  20 --
 .../CodeGen/PowerPC/vector-reduce-fmin.ll     |  20 --
 .../CodeGen/PowerPC/vector-reduce-fmul.ll     |  16 -
 llvm/test/CodeGen/PowerPC/vsx.ll              |   3 -
 31 files changed, 61 insertions(+), 670 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index fdbdc14736c86..3cb7cd9d8f229 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -17,6 +17,7 @@ def sub_un : SubRegIndex<1, 3>;
 def sub_32 : SubRegIndex<32>;
 def sub_32_hi_phony : SubRegIndex<32,32>;
 def sub_64 : SubRegIndex<64>;
+def sub_64_hi_phony : SubRegIndex<64,64>;
 def sub_vsx0 : SubRegIndex<128>;
 def sub_vsx1 : SubRegIndex<128, 128>;
 def sub_gp8_x0 : SubRegIndex<64>;
@@ -77,19 +78,19 @@ class VF<bits<5> num, string n> : PPCReg<n> {
 }
 
 // VR - One of the 32 128-bit vector registers
-class VR<VF SubReg, string n> : PPCReg<n> {
+class VR<VF SubReg, VF SubRegH, string n> : PPCReg<n> {
   let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
   let HWEncoding{5} = 0;
-  let SubRegs = [SubReg];
-  let SubRegIndices = [sub_64];
+  let SubRegs = [SubReg, SubRegH];
+  let SubRegIndices = [sub_64, sub_64_hi_phony];
 }
 
 // VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
 // floating-point registers.
-class VSRL<FPR SubReg, string n> : PPCReg<n> {
+class VSRL<FPR SubReg, FPR SubRegH, string n> : PPCReg<n> {
   let HWEncoding = SubReg.HWEncoding;
-  let SubRegs = [SubReg];
-  let SubRegIndices = [sub_64];
+  let SubRegs = [SubReg, SubRegH];
+  let SubRegIndices = [sub_64, sub_64_hi_phony];
 }
 
 // VSXReg - One of the VSX registers in the range vs32-vs63 with numbering
@@ -155,6 +156,22 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }
 
+// The FH and VFH registers have been marked as Artifical because there are no
+// instructions on PowerPC that use those register classes. They only exist
+// in order to ensure that the super registers (V and VSL) are covered by their
+// subregisters and have correct subregister lane masks.
+let isArtificial = 1 in {
+  foreach Index = 0-31 in {
+    def FH#Index : FPR<-1, "">;
+    def VFH#Index : VF<-1, "">;
+  }
+}
+
+let isAllocatable = 0, CopyCost = -1 in {
+  def VFHRC : RegisterClass<"PPC", [f64], 64, (sequence "VFH%u", 0, 31)>;
+  def FHRC : RegisterClass<"PPC", [f64], 64, (sequence "FH%u", 0, 31)>;
+}
+
 // Floating-point pair registers
 foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
   def Fpair#Index : FPPair<"fp"#Index, Index>;
@@ -168,17 +185,19 @@ foreach Index = 0-31 in {
                  DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 
+let CoveredBySubRegs = 1 in {
 // Vector registers
 foreach Index = 0-31 in {
-  def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
+  def V#Index : VR<!cast<VF>("VF"#Index), !cast<VF>("VFH"#Index), "v"#Index>,
                 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 
 // VSX registers
 foreach Index = 0-31 in {
-  def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+  def VSL#Index : VSRL<!cast<FPR>("F"#Index), !cast<FPR>("FH"#Index), "vs"#Index>,
                   DwarfRegAlias<!cast<FPR>("F"#Index)>;
 }
+}
 
 // Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
 // asm printing.
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index aae23265710ce..afc7a39e18dc8 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -750,25 +750,21 @@ entry:
 define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
 ; CHECK-64-LABEL: testDoubleImm1:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-64-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: testDoubleImm1:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-32-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-32-NEXT:    blr
 ;
 ; CHECK-64-P10-LABEL: testDoubleImm1:
 ; CHECK-64-P10:       # %bb.0: # %entry
-; CHECK-64-P10-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-64-P10-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-64-P10-NEXT:    blr
 ;
 ; CHECK-32-P10-LABEL: testDoubleImm1:
 ; CHECK-32-P10:       # %bb.0: # %entry
-; CHECK-32-P10-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-32-P10-NEXT:    xxpermdi 34, 1, 34, 1
 ; CHECK-32-P10-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
index 19e298a633e0b..2f543da9b29a0 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
@@ -1099,7 +1099,6 @@ define double @getd1(<2 x double> %vd) {
 ; CHECK-LABEL: getd1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd 1, 34
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 1
@@ -1115,7 +1114,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
 ; CHECK-NEXT:    lvsl 3, 0, 3
 ; CHECK-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-NEXT:    xxlor 1, 34, 34
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 %i
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index f729018dd4106..91431ed15f6a7 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -1319,11 +1319,7 @@ entry:
 define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ; P9BE-LABEL: fromRegsConvftoi:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
-; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpsxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpsxws v3, vs0
@@ -1332,11 +1328,7 @@ define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ;
 ; P9LE-LABEL: fromRegsConvftoi:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
-; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpsxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpsxws v3, vs0
@@ -1345,10 +1337,6 @@ define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ;
 ; P8BE-LABEL: fromRegsConvftoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpsxws v2, vs0
@@ -1358,10 +1346,6 @@ define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
 ;
 ; P8LE-LABEL: fromRegsConvftoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
@@ -1773,11 +1757,7 @@ entry:
 define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ; P9BE-LABEL: fromRegsConvdtoi:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
-; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpsxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpsxws v3, vs0
@@ -1786,11 +1766,7 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P9LE-LABEL: fromRegsConvdtoi:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
-; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpsxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpsxws v3, vs0
@@ -1799,10 +1775,6 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8BE-LABEL: fromRegsConvdtoi:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpsxws v2, vs0
@@ -1812,10 +1784,6 @@ define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
 ;
 ; P8LE-LABEL: fromRegsConvdtoi:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpsxws v2, vs0
@@ -2839,11 +2807,7 @@ entry:
 define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ; P9BE-LABEL: fromRegsConvftoui:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
-; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpuxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpuxws v3, vs0
@@ -2852,11 +2816,7 @@ define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ;
 ; P9LE-LABEL: fromRegsConvftoui:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
-; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpuxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpuxws v3, vs0
@@ -2865,10 +2825,6 @@ define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ;
 ; P8BE-LABEL: fromRegsConvftoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpuxws v2, vs0
@@ -2878,10 +2834,6 @@ define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
 ;
 ; P8LE-LABEL: fromRegsConvftoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
@@ -3294,11 +3246,7 @@ entry:
 define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
 ; P9BE-LABEL: fromRegsConvdtoui:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9BE-NEXT:    xxmrghd vs0, vs2, vs4
-; P9BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xvcvdpuxws v2, vs0
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs3
 ; P9BE-NEXT:    xvcvdpuxws v3, vs0
@@ -3307,11 +3255,7 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P9LE-LABEL: fromRegsConvdtoui:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
-; P9LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; P9LE-NEXT:    xvcvdpuxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs4, vs2
 ; P9LE-NEXT:    xvcvdpuxws v3, vs0
@@ -3320,10 +3264,6 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8BE-LABEL: fromRegsConvdtoui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8BE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs2, vs4
 ; P8BE-NEXT:    xxmrghd vs1, vs1, vs3
 ; P8BE-NEXT:    xvcvdpuxws v2, vs0
@@ -3333,10 +3273,6 @@ define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d)
 ;
 ; P8LE-LABEL: fromRegsConvdtoui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; P8LE-NEXT:    # kill: def $f3 killed $f3 def $vsl3
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P8LE-NEXT:    xxmrghd vs1, vs4, vs2
 ; P8LE-NEXT:    xvcvdpuxws v2, vs0
@@ -4269,32 +4205,24 @@ entry:
 define <2 x i64> @fromRegsConvftoll(float %a, float %b) {
 ; P9BE-LABEL: fromRegsConvftoll:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpsxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvftoll:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpsxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvftoll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpsxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvftoll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsxds v2, vs0
 ; P8LE-NEXT:    blr
@@ -4630,32 +4558,24 @@ entry:
 define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
 ; P9BE-LABEL: fromRegsConvdtoll:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpsxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvdtoll:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpsxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvdtoll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpsxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvdtoll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpsxds v2, vs0
 ; P8LE-NEXT:    blr
@@ -5451,32 +5371,24 @@ entry:
 define <2 x i64> @fromRegsConvftoull(float %a, float %b) {
 ; P9BE-LABEL: fromRegsConvftoull:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpuxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvftoull:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpuxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvftoull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpuxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvftoull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpuxds v2, vs0
 ; P8LE-NEXT:    blr
@@ -5812,32 +5724,24 @@ entry:
 define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
 ; P9BE-LABEL: fromRegsConvdtoull:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P9BE-NEXT:    xvcvdpuxds v2, vs0
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: fromRegsConvdtoull:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P9LE-NEXT:    xvcvdpuxds v2, vs0
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: fromRegsConvdtoull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8BE-NEXT:    xxmrghd vs0, vs1, vs2
 ; P8BE-NEXT:    xvcvdpuxds v2, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsConvdtoull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; P8LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8LE-NEXT:    xxmrghd vs0, vs2, vs1
 ; P8LE-NEXT:    xvcvdpuxds v2, vs0
 ; P8LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.ll
index 7aa8b0e7e8327..798c23cd6b961 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.ll
@@ -22,7 +22,6 @@ define dso_local double @insert_exp(double %d, i64 %ull) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mffprd 3, 1
 ; CHECK-NEXT:    xsiexpdp 1, 3, 4
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 entry:
   %0 = tail call double @llvm.ppc.insert.exp(double %d, i64 %ull)
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index f2bd4c7f40a46..c26f98c5b0495 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -565,7 +565,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P8-NEXT:    bl dummy
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxlxor f0, f0, f0
-; CHECK-P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P8-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
 ; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
@@ -580,7 +579,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9-NEXT:    bl dummy
 ; CHECK-P9-NEXT:    nop
 ; CHECK-P9-NEXT:    xxlxor f0, f0, f0
-; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P9-NEXT:    stxv vs0, 0(r30)
 ;
@@ -594,7 +592,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9-BE-NEXT:    bl dummy
 ; CHECK-P9-BE-NEXT:    nop
 ; CHECK-P9-BE-NEXT:    xxlxor f0, f0, f0
-; CHECK-P9-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-BE-NEXT:    xxmrghd vs0, vs0, vs1
 ; CHECK-P9-BE-NEXT:    stxv vs0, 0(r30)
 ;
@@ -621,7 +618,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P7-NEXT:    bl dummy
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    xxlxor f0, f0, f0
-; CHECK-P7-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P7-NEXT:    xxmrghd vs0, vs1, vs0
 ; CHECK-P7-NEXT:    xxswapd vs0, vs0
 ; CHECK-P7-NEXT:    stxvd2x vs0, 0, r30
@@ -636,7 +632,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-64-NEXT:    bl .dummy[PR]
 ; P8-AIX-64-NEXT:    nop
 ; P8-AIX-64-NEXT:    xxlxor f0, f0, f0
-; P8-AIX-64-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-AIX-64-NEXT:    xxmrghd vs0, vs0, vs1
 ; P8-AIX-64-NEXT:    stxvd2x vs0, 0, r31
 ;
@@ -650,7 +645,6 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-32-NEXT:    bl .dummy[PR]
 ; P8-AIX-32-NEXT:    nop
 ; P8-AIX-32-NEXT:    xxlxor f0, f0, f0
-; P8-AIX-32-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-AIX-32-NEXT:    xxmrghd vs0, vs0, vs1
 ; P8-AIX-32-NEXT:    stxvd2x vs0, 0, r31
 test_entry:
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index a72abf7007e8d..04af0947c7a33 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -6,7 +6,6 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 ; CHECK-LABEL: fneg_fdiv_splat:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxspltd 0, 1, 0
 ; CHECK-NEXT:    addi 3, 3, .LCPI0_0@toc@l
 ; CHECK-NEXT:    xvredp 1, 0
diff --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll
index a9feb93627b06..2ded7215d8fd6 100644
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -11,7 +11,6 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsplti32dx vs1, 0, 940572664
 ; CHECK-NEXT:    xxsplti32dx vs1, 1, 1073741824
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: FloatConstantPool:
@@ -28,7 +27,6 @@ entry:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsplti32dx vs1, 0, 1048574
 ; CHECK-NEXT:    xxsplti32dx vs1, 1, 780229072
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: DoubleConstantPool:
@@ -47,8 +45,6 @@ entry:
 ; CHECK-NEXT:    xxsplti32dx vs2, 0, -2146625897
 ; CHECK-NEXT:    xxsplti32dx vs1, 1, -609716532
 ; CHECK-NEXT:    xxsplti32dx vs2, 1, 1339675259
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: LongDoubleConstantPool:
@@ -224,13 +220,11 @@ define double @two_constants_two_bb(i32 %m, double %a) {
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    xxsplti32dx vs1, 0, 1074935889
 ; CHECK-NEXT:    xxsplti32dx vs1, 1, -343597384
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB12_2: # %if.end
 ; CHECK-NEXT:    xxsplti32dx vs0, 0, 1076085391
 ; CHECK-NEXT:    xxsplti32dx vs0, 1, 1546188227
 ; CHECK-NEXT:    xsadddp f1, f1, f0
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: two_constants_two_bb:
@@ -369,12 +363,10 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-NEXT:    stxv vs63, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    xxsplti32dx vs63, 0, 1074935889
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
-; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
 ; CHECK-NEXT:    xxsplti32dx vs3, 0, 1074935889
 ; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
-; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
 ; CHECK-NEXT:    xxsplti32dx vs63, 1, 8724152
 ; CHECK-NEXT:    xxlxor f4, f4, f4
diff --git a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
index fc0bfef11d7a6..9d537d89c009d 100644
--- a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
+++ b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
@@ -403,11 +403,10 @@ define void @call_test_byval_mem32_2() #0 {
 ; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    addis 3, 2, .LC5@toc@ha
 ; CHECK-NEXT:    vspltisw 2, 1
+; CHECK-NEXT:    addis 3, 2, .LC5@toc@ha
 ; CHECK-NEXT:    ld 3, .LC5@toc@l(3)
 ; CHECK-NEXT:    xvcvsxwdp 1, 34
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    ld 7, 24(3)
 ; CHECK-NEXT:    ld 6, 16(3)
 ; CHECK-NEXT:    ld 5, 8(3)
@@ -453,9 +452,7 @@ define void @call_test_byval_mem32_3() #0 {
 ; CHECK-NEXT:    li 7, 2
 ; CHECK-NEXT:    ld 3, .LC5@toc@l(3)
 ; CHECK-NEXT:    xvcvsxwdp 1, 34
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    xvcvsxwdp 2, 35
-; CHECK-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; CHECK-NEXT:    lxvd2x 0, 3, 4
 ; CHECK-NEXT:    li 4, 88
 ; CHECK-NEXT:    stxvd2x 0, 1, 4
diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index b39c9327bc8ab..3d45e9a3a509c 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -202,7 +202,6 @@ define dso_local double @getNegatedExpression_crash(double %x, double %y) {
 ; CHECK-FAST-NEXT:    xvcvsxwdp 4, 34
 ; CHECK-FAST-NEXT:    lfs 3, .LCPI5_0@toc@l(3)
 ; CHECK-FAST-NEXT:    xssubdp 0, 1, 4
-; CHECK-FAST-NEXT:    # kill: def $f4 killed $f4 killed $vsl4
 ; CHECK-FAST-NEXT:    xsmaddadp 4, 1, 3
 ; CHECK-FAST-NEXT:    xsmaddadp 0, 4, 2
 ; CHECK-FAST-NEXT:    fmr 1, 0
@@ -226,7 +225,6 @@ define dso_local double @getNegatedExpression_crash(double %x, double %y) {
 ; CHECK-NEXT:    xvcvsxwdp 4, 34
 ; CHECK-NEXT:    lfs 3, .LCPI5_0@toc@l(3)
 ; CHECK-NEXT:    xssubdp 0, 1, 4
-; CHECK-NEXT:    # kill: def $f4 killed $f4 killed $vsl4
 ; CHECK-NEXT:    xsmaddadp 4, 1, 3
 ; CHECK-NEXT:    xsmaddadp 0, 4, 2
 ; CHECK-NEXT:    fmr 1, 0
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index 4c8729b9f43a5..eac4fb6f98bf7 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -229,7 +229,6 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, vs1, v30
 ; P8-NEXT:    xscvspdpn f1, v31
 ; P8-NEXT:    xvcvdpsp v29, vs0
@@ -240,7 +239,6 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P8-NEXT:    xscvspdpn f1, vs0
 ; P8-NEXT:    bl nearbyintf
 ; P8-NEXT:    nop
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, v30, vs1
 ; P8-NEXT:    li r3, 160
 ; P8-NEXT:    xvcvdpsp v2, vs0
@@ -278,7 +276,6 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd vs0, vs1, v30
 ; P9-NEXT:    xscvspdpn f1, v31
 ; P9-NEXT:    xvcvdpsp v29, vs0
@@ -289,7 +286,6 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp
 ; P9-NEXT:    xscvspdpn f1, vs0
 ; P9-NEXT:    bl nearbyintf
 ; P9-NEXT:    nop
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd vs0, v30, vs1
 ; P9-NEXT:    lxv v31, 64(r1) # 16-byte Folded Reload
 ; P9-NEXT:    lxv v30, 48(r1) # 16-byte Folded Reload
@@ -327,11 +323,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
 ; P8-NEXT:    nop
 ; P8-NEXT:    xxlor v30, f1, f1
 ; P8-NEXT:    xxswapd vs1, v31
-; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    bl nearbyint
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 144
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd v2, v30, vs1
 ; P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; P8-NEXT:    li r3, 128
@@ -358,10 +352,8 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) stric
 ; P9-NEXT:    nop
 ; P9-NEXT:    xscpsgndp v30, f1, f1
 ; P9-NEXT:    xxswapd vs1, v31
-; P9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P9-NEXT:    bl nearbyint
 ; P9-NEXT:    nop
-; P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P9-NEXT:    xxmrghd v2, v30, vs1
 ; P9-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
 ; P9-NEXT:    lxv v30, 32(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 8cb68e60f7f9b..19b4b1c9cdf95 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -70,7 +70,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvspdpn 2, 0
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 0, 1, 61
 ; CHECK-NEXT:    xscvspdpn 1, 62
 ; CHECK-NEXT:    xscvspdpn 2, 63
@@ -84,7 +83,6 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-NEXT:    xscvspdpn 2, 0
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 0, 61, 1
 ; CHECK-NEXT:    lxv 63, 80(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv 62, 64(1) # 16-byte Folded Reload
@@ -124,11 +122,8 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-NEXT:    xscpsgndp 61, 1, 1
 ; CHECK-NEXT:    xxswapd 1, 62
 ; CHECK-NEXT:    xxswapd 2, 63
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; CHECK-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; CHECK-NEXT:    bl fmod
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd 34, 61, 1
 ; CHECK-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index 13f70f420400b..4256933300243 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -666,7 +666,6 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 80
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs0, vs61, vs1
 ; P8-NEXT:    xxmrghd vs1, vs63, vs62
 ; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
@@ -776,7 +775,6 @@ define <4 x double> @test_extend64_vec4(ptr %p) #0 {
 ; P8-NEXT:    nop
 ; P8-NEXT:    li r3, 80
 ; P8-NEXT:    xxmrghd vs35, vs63, vs62
-; P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; P8-NEXT:    xxmrghd vs34, vs61, vs1
 ; P8-NEXT:    ld r30, 96(r1) # 8-byte Folded Reload
 ; P8-NEXT:    lxvd2x vs63, r1, r3 # 16-byte Folded Reload
@@ -1005,11 +1003,10 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
 ; P8-NEXT:    stdu r1, -128(r1)
 ; P8-NEXT:    li r3, 48
 ; P8-NEXT:    std r0, 144(r1)
-; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
 ; P8-NEXT:    xxswapd vs1, vs34
+; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
-; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r7
 ; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
@@ -1019,9 +1016,8 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
 ; P8-NEXT:    vmr v31, v3
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
-; P8-NEXT:    mr r29, r3
 ; P8-NEXT:    xxswapd vs1, vs63
-; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; P8-NEXT:    mr r29, r3
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    xxlor f1, vs62, vs62
@@ -1238,7 +1234,6 @@ define half @PR40273(half) #0 {
 ; P8-NEXT:    vspltisw v2, 1
 ; P8-NEXT:    xvcvsxwdp vs1, vs34
 ; P8-NEXT:  .LBB20_2:
-; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    addi r1, r1, 32
 ; P8-NEXT:    ld r0, 16(r1)
 ; P8-NEXT:    mtlr r0
@@ -1253,12 +1248,10 @@ define half @PR40273(half) #0 {
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    fcmpu cr0, f0, f1
-; CHECK-NEXT:    beq cr0, .LBB20_2
+; CHECK-NEXT:    beqlr cr0
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    vspltisw v2, 1
 ; CHECK-NEXT:    xvcvsxwdp vs1, vs34
-; CHECK-NEXT:  .LBB20_2:
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; SOFT-LABEL: PR40273:
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index ed8089b4b303e..151df6096b30b 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -13,7 +13,6 @@ define float @ldexp_f32(i8 zeroext %x) {
 ; CHECK-NEXT:    vspltisw v2, 1
 ; CHECK-NEXT:    mr r4, r3
 ; CHECK-NEXT:    xvcvsxwdp vs1, v2
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -36,7 +35,6 @@ define double @ldexp_f64(i8 zeroext %x) {
 ; CHECK-NEXT:    vspltisw v2, 1
 ; CHECK-NEXT:    mr r4, r3
 ; CHECK-NEXT:    xvcvsxwdp vs1, v2
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl ldexp
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -120,7 +118,6 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
 ; CHECK-NEXT:    vextuwrx r4, r3, v31
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd vs0, v29, vs1
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    vextuwrx r4, r3, v31
@@ -135,7 +132,6 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
 ; CHECK-NEXT:    xscvspdpn f1, vs0
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:    lxv v31, 80(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv v30, 64(r1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
index 7373a328a4f05..842cb929541cf 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -124,21 +124,18 @@ define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
-; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 1081435463
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -1374389535
-; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
@@ -151,7 +148,6 @@ define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 1081435463
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -1374389535
-; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
   ret double 3.423300e+02
@@ -162,21 +158,18 @@ define dso_local float @testFloatDenormScalar() local_unnamed_addr {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testFloatDenormScalar:
@@ -189,7 +182,6 @@ define dso_local float @testFloatDenormScalar() local_unnamed_addr {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
   ret float 0x380B38FB80000000
@@ -200,21 +192,18 @@ define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-BE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-NOPCREL-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-BE-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK-NOPCREL-LE:       # %bb.0: # %entry
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-NOPCREL-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NOPCREL-LE-NEXT:    blr
 ;
 ; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
@@ -227,7 +216,6 @@ define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 0, 940259579
 ; CHECK-BE-NEXT:    xxsplti32dx vs1, 1, -2147483648
-; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 entry:
   ret double 0x380B38FB80000000
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 87b8a64cc67bd..8f12b182283f5 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2416,7 +2416,6 @@ define double @getd0(<2 x double> %vd) {
 ; CHECK-LE-LABEL: getd0:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    xxswapd vs1, v2
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: getd0:
@@ -2435,7 +2434,6 @@ define double @getd1(<2 x double> %vd) {
 ; CHECK-LABEL: getd1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd vs1, v2
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: getd1:
@@ -2446,7 +2444,6 @@ define double @getd1(<2 x double> %vd) {
 ; CHECK-AIX-LABEL: getd1:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    xxswapd 1, 34
-; CHECK-AIX-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 1
@@ -2462,7 +2459,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
 ; CHECK-NEXT:    lvsl v3, 0, r3
 ; CHECK-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-NEXT:    xxlor vs1, v2, v2
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: getveld:
@@ -2474,7 +2470,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
 ; CHECK-LE-NEXT:    lvsl v3, 0, r3
 ; CHECK-LE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-LE-NEXT:    xxlor vs1, v2, v2
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: getveld:
@@ -2484,7 +2479,6 @@ define double @getveld(<2 x double> %vd, i32 signext %i) {
 ; CHECK-AIX-NEXT:    lvsl 3, 0, 3
 ; CHECK-AIX-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-AIX-NEXT:    xxlor 1, 34, 34
-; CHECK-AIX-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-AIX-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %vd, i32 %i
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
index 541b2c46dd395..0b1047bb6cfbe 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -175,7 +175,6 @@ define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr {
 ; CHECK-ALL:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    xxsplti32dx vs1, 0, 1078011044
 ; CHECK-S-NEXT:    xxsplti32dx vs1, 1, -337824948
-; CHECK-S-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:    blr
 entry:
   ret double 0x404124A4EBDD334C
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index ca4be83cc16ac..a48d6968aafbf 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -845,12 +845,10 @@ define double @sel_constants_frem_constant(i1 %cond) {
 ; ALL-NEXT:  # %bb.1:
 ; ALL-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
 ; ALL-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
-; ALL-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; ALL-NEXT:    blr
 ; ALL-NEXT:  .LBB48_2:
 ; ALL-NEXT:    vspltisw 2, -4
 ; ALL-NEXT:    xvcvsxwdp 1, 34
-; ALL-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; ALL-NEXT:    blr
   %sel = select i1 %cond, double -4.0, double 23.3
   %bo = frem double %sel, 5.1
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
index 39eab1f562e71..31407e0d44cfb 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -31,4 +31,3 @@ body:             |
     $v2 = COPY %5
     BLR8 implicit $lr8, implicit $rm, implicit $v2
 ...
-
diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
index e1fd6180b2662..cf69d3ad09878 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
+++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
@@ -5,21 +5,18 @@
 
 # Keep track of all of the lanemasks for various subregsiters.
 #
-# TODO: The mask for %6.sub_vsx1:accrc is the same as the mask for %10.sub_vsx1_then_sub_64:accrc.
-#       Ideally on PowerPC these masks should be different. To be addressed in a later patch.
-#
-# CHECK: %3 [80r,80d:0) 0@80r  L0000000000000004 [80r,80d:0) 0@80r  weight:0.000000e+00
-# CHECK: %4 [96r,96d:0) 0@96r  L0000000000000800 [96r,96d:0) 0@96r  weight:0.000000e+00
-# CHECK: %5 [112r,112d:0) 0@112r  L0000000000000004 [112r,112d:0) 0@112r  weight:0.000000e+00
-# CHECK: %6 [128r,128d:0) 0@128r  L0000000000000800 [128r,128d:0) 0@128r  weight:0.000000e+00
+# CHECK: %3 [80r,80d:0) 0@80r  L000000000000000C [80r,80d:0) 0@80r  weight:0.000000e+00
+# CHECK: %4 [96r,96d:0) 0@96r  L0000000000003000 [96r,96d:0) 0@96r  weight:0.000000e+00
+# CHECK: %5 [112r,112d:0) 0@112r  L000000000000000C [112r,112d:0) 0@112r  weight:0.000000e+00
+# CHECK: %6 [128r,128d:0) 0@128r  L0000000000003000 [128r,128d:0) 0@128r  weight:0.000000e+00
 # CHECK: %7 [144r,144d:0) 0@144r  L0000000000000004 [144r,144d:0) 0@144r  weight:0.000000e+00
-# CHECK: %8 [160r,160d:0) 0@160r  L0000000000000800 [160r,160d:0) 0@160r  weight:0.000000e+00
+# CHECK: %8 [160r,160d:0) 0@160r  L0000000000001000 [160r,160d:0) 0@160r  weight:0.000000e+00
 # CHECK: %9 [176r,176d:0) 0@176r  L0000000000000004 [176r,176d:0) 0@176r  weight:0.000000e+00
-# CHECK: %10 [192r,192d:0) 0@192r  L0000000000000800 [192r,192d:0) 0@192r  weight:0.000000e+00
-# CHECK: %11 [208r,208d:0) 0@208r  L0000000000001000 [208r,208d:0) 0@208r  weight:0.000000e+00
-# CHECK: %12 [224r,224d:0) 0@224r  L0000000000002000 [224r,224d:0) 0@224r  weight:0.000000e+00
-# CHECK: %13 [240r,240d:0) 0@240r  L0000000000000804 [240r,240d:0) 0@240r  weight:0.000000e+00
-# CHECK: %14 [256r,256d:0) 0@256r  L0000000000003000 [256r,256d:0) 0@256r  weight:0.000000e+00
+# CHECK: %10 [192r,192d:0) 0@192r  L0000000000001000 [192r,192d:0) 0@192r  weight:0.000000e+00
+# CHECK: %11 [208r,208d:0) 0@208r  L0000000000004000 [208r,208d:0) 0@208r  weight:0.000000e+00
+# CHECK: %12 [224r,224d:0) 0@224r  L0000000000010000 [224r,224d:0) 0@224r  weight:0.000000e+00
+# CHECK: %13 [240r,240d:0) 0@240r  L000000000000300C [240r,240d:0) 0@240r  weight:0.000000e+00
+# CHECK: %14 [256r,256d:0) 0@256r  L000000000003C000 [256r,256d:0) 0@256r  weight:0.000000e+00
 
 
 # CHECK:       0B bb.0
diff --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll
index 1d6f1f71a2383..943edd5948429 100644
--- a/llvm/test/CodeGen/PowerPC/toc-float.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-float.ll
@@ -9,14 +9,12 @@ define double @doubleConstant1() {
 ; CHECK-P9:       # %bb.0:
 ; CHECK-P9-NEXT:    vspltisw 2, 14
 ; CHECK-P9-NEXT:    xvcvsxwdp 1, 34
-; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: doubleConstant1:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    vspltisw 2, 14
 ; CHECK-P8-NEXT:    xvcvsxwdp 1, 34
-; CHECK-P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-P8-NEXT:    blr
   ret double 1.400000e+01
 }
diff --git a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
index 49c80a91f8f99..d0dda1a071754 100644
--- a/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
+++ b/llvm/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@@ -122,7 +122,6 @@ define double @getd(<2 x double> %a, i32 zeroext %b) {
 ; CHECK-NEXT:    lvsl 3, 0, 3
 ; CHECK-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-NEXT:    xxlor 1, 34, 34
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: getd:
@@ -132,7 +131,6 @@ define double @getd(<2 x double> %a, i32 zeroext %b) {
 ; CHECK-BE-NEXT:    lvsl 3, 0, 3
 ; CHECK-BE-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-BE-NEXT:    xxlor 1, 34, 34
-; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: getd:
@@ -142,7 +140,6 @@ define double @getd(<2 x double> %a, i32 zeroext %b) {
 ; CHECK-P7-NEXT:    lvsl 3, 0, 3
 ; CHECK-P7-NEXT:    vperm 2, 2, 2, 3
 ; CHECK-P7-NEXT:    xxlor 1, 34, 34
-; CHECK-P7-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-P7-NEXT:    blr
 entry:
   %vecext = extractelement <2 x double> %a, i32 %b
diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
index b98aed8616509..291a9c1f978da 100644
--- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -940,25 +940,21 @@ entry:
 define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
 ; CHECK-LABEL: testDoubleImm1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-NEXT:    xxmrghd v2, v2, vs1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testDoubleImm1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-BE-NEXT:    xxpermdi v2, vs1, v2, 1
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testDoubleImm1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; CHECK-P9-NEXT:    xxpermdi v2, vs1, v2, 1
 ; CHECK-P9-NEXT:    blr
 ;
 ; AIX-P8-LABEL: testDoubleImm1:
 ; AIX-P8:       # %bb.0: # %entry
-; AIX-P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; AIX-P8-NEXT:    xxpermdi v2, vs1, v2, 1
 ; AIX-P8-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index f217162782bfd..aedb1a9c65cf8 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -107,32 +107,20 @@ entry:
 define <3 x double> @constrained_vector_fdiv_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsdivdp 3, 3, 6
 ; PC64LE-NEXT:    xvdivdp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsdivdp 3, 3, 6
 ; PC64LE9-NEXT:    xvdivdp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
@@ -217,13 +205,10 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 63
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -252,11 +237,8 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    xxswapd 2, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
@@ -408,7 +390,6 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    fmr 2, 30
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 29
 ; PC64LE-NEXT:    fmr 2, 31
@@ -423,7 +404,6 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    lfd 29, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lfd 28, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -451,7 +431,6 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    fmr 2, 30
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 29
 ; PC64LE9-NEXT:    fmr 2, 31
@@ -462,7 +441,6 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 29, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 28, 48(1) # 8-byte Folded Reload
@@ -505,12 +483,9 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 59, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 60
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 62
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE-NEXT:    xxlor 1, 61, 61
 ; PC64LE-NEXT:    xxlor 2, 63, 63
@@ -518,14 +493,11 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 60, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 61
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 63
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 112
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 96
@@ -562,11 +534,8 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xscpsgndp 59, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 60
 ; PC64LE9-NEXT:    xxswapd 2, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 61, 61
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
@@ -575,11 +544,8 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xscpsgndp 60, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 61
 ; PC64LE9-NEXT:    xxswapd 2, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 96(1) # 16-byte Folded Reload
@@ -704,32 +670,20 @@ entry:
 define <3 x double> @constrained_vector_fmul_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fmul_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsmuldp 3, 3, 6
 ; PC64LE-NEXT:    xvmuldp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsmuldp 3, 3, 6
 ; PC64LE9-NEXT:    xvmuldp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
@@ -866,32 +820,20 @@ entry:
 define <3 x double> @constrained_vector_fadd_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xsadddp 3, 3, 6
 ; PC64LE-NEXT:    xvadddp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xsadddp 3, 3, 6
 ; PC64LE9-NEXT:    xvadddp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
@@ -1028,32 +970,20 @@ entry:
 define <3 x double> @constrained_vector_fsub_v3f64(<3 x double> %x, <3 x double> %y) #0 {
 ; PC64LE-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    xssubdp 3, 3, 6
 ; PC64LE-NEXT:    xvsubdp 2, 1, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    xssubdp 3, 3, 6
 ; PC64LE9-NEXT:    xvsubdp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
@@ -1175,26 +1105,18 @@ entry:
 define <3 x double> @constrained_vector_sqrt_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xssqrtdp 3, 3
 ; PC64LE-NEXT:    xvsqrtdp 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xssqrtdp 3, 3
 ; PC64LE9-NEXT:    xvsqrtdp 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
@@ -1277,13 +1199,10 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 63
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -1312,11 +1231,8 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
 ; PC64LE9-NEXT:    xxswapd 2, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 61, 1
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 48(1) # 16-byte Folded Reload
@@ -1468,7 +1384,6 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    fmr 2, 30
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 29
 ; PC64LE-NEXT:    fmr 2, 31
@@ -1483,7 +1398,6 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    lfd 29, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lfd 28, 64(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
 ; PC64LE-NEXT:    mtlr 0
@@ -1511,7 +1425,6 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    fmr 2, 30
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 29
 ; PC64LE9-NEXT:    fmr 2, 31
@@ -1522,7 +1435,6 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 29, 56(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 28, 48(1) # 8-byte Folded Reload
@@ -1565,12 +1477,9 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 59, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 60
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 62
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE-NEXT:    xxlor 1, 61, 61
 ; PC64LE-NEXT:    xxlor 2, 63, 63
@@ -1578,14 +1487,11 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 60, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 61
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxswapd 2, 63
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 112
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 96
@@ -1622,11 +1528,8 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xscpsgndp 59, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 60
 ; PC64LE9-NEXT:    xxswapd 2, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 59, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 61, 61
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
@@ -1635,11 +1538,8 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-NEXT:    xscpsgndp 60, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 61
 ; PC64LE9-NEXT:    xxswapd 2, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 60, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 96(1) # 16-byte Folded Reload
@@ -1712,14 +1612,12 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    ld 30, 80(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -1744,13 +1642,11 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -1894,7 +1790,6 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    mr 4, 30
@@ -1907,7 +1802,6 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 80(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    ld 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 96
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -1934,7 +1828,6 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    mr 4, 30
@@ -1945,7 +1838,6 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    ld 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 80
@@ -1981,27 +1873,23 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE-NEXT:    vmr 31, 3
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    ld 30, 96(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
@@ -2030,25 +1918,21 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2116,11 +2000,9 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2143,10 +2025,8 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2269,7 +2149,6 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl sin
@@ -2280,7 +2159,6 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -2303,7 +2181,6 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl sin
@@ -2313,7 +2190,6 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2346,22 +2222,18 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -2388,20 +2260,16 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2467,11 +2335,9 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2494,10 +2360,8 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2620,7 +2484,6 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl cos
@@ -2631,7 +2494,6 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -2654,7 +2516,6 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl cos
@@ -2664,7 +2525,6 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -2697,22 +2557,18 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -2739,20 +2595,16 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -2818,11 +2670,9 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -2845,10 +2695,8 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -2971,7 +2819,6 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp
@@ -2982,7 +2829,6 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -3005,7 +2851,6 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl exp
@@ -3015,7 +2860,6 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3048,22 +2892,18 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3090,20 +2930,16 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3169,11 +3005,9 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3196,10 +3030,8 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -3322,7 +3154,6 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl exp2
@@ -3333,7 +3164,6 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -3356,7 +3186,6 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl exp2
@@ -3366,7 +3195,6 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3399,22 +3227,18 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3441,20 +3265,16 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3520,11 +3340,9 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3547,10 +3365,8 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -3673,7 +3489,6 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log
@@ -3684,7 +3499,6 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -3707,7 +3521,6 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log
@@ -3717,7 +3530,6 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -3750,22 +3562,18 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -3792,20 +3600,16 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -3871,11 +3675,9 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -3898,10 +3700,8 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -4024,7 +3824,6 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log10
@@ -4035,7 +3834,6 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -4058,7 +3856,6 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log10
@@ -4068,7 +3865,6 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4101,22 +3897,18 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -4143,20 +3935,16 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -4222,11 +4010,9 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -4249,10 +4035,8 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -4375,7 +4159,6 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl log2
@@ -4386,7 +4169,6 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -4409,7 +4191,6 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl log2
@@ -4419,7 +4200,6 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4452,22 +4232,18 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -4494,20 +4270,16 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -4615,26 +4387,18 @@ define <3 x float> @constrained_vector_rint_v3f32(<3 x float> %x) #0 {
 define <3 x double> @constrained_vector_rint_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_rint_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpic 3, 3
 ; PC64LE-NEXT:    xvrdpic 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_rint_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpic 3, 3
 ; PC64LE9-NEXT:    xvrdpic 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
@@ -4712,11 +4476,9 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -4739,10 +4501,8 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -4865,7 +4625,6 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl nearbyint
@@ -4876,7 +4635,6 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -4899,7 +4657,6 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl nearbyint
@@ -4909,7 +4666,6 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -4942,22 +4698,18 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -4984,20 +4736,16 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl nearbyint
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
@@ -5179,10 +4927,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    std 0, 80(1)
@@ -5195,7 +4939,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
@@ -5207,10 +4950,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    std 0, 64(1)
@@ -5224,7 +4963,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -5421,10 +5159,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    std 0, 80(1)
@@ -5437,7 +5171,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 64
@@ -5449,10 +5182,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
-; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE9-NEXT:    std 0, 64(1)
@@ -5466,7 +5195,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    xxswapd 1, 63
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    addi 1, 1, 48
 ; PC64LE9-NEXT:    ld 0, 16(1)
 ; PC64LE9-NEXT:    mtlr 0
@@ -6792,26 +6520,18 @@ entry:
 define <3 x double> @constrained_vector_ceil_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpip 3, 3
 ; PC64LE-NEXT:    xvrdpip 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpip 3, 3
 ; PC64LE9-NEXT:    xvrdpip 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
@@ -6908,26 +6628,18 @@ entry:
 define <3 x double> @constrained_vector_floor_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpim 3, 3
 ; PC64LE-NEXT:    xvrdpim 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpim 3, 3
 ; PC64LE9-NEXT:    xvrdpim 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
@@ -7024,26 +6736,18 @@ entry:
 define <3 x double> @constrained_vector_round_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_round_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpi 3, 3
 ; PC64LE-NEXT:    xvrdpi 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpi 3, 3
 ; PC64LE9-NEXT:    xvrdpi 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
@@ -7139,26 +6843,18 @@ entry:
 define <3 x double> @constrained_vector_trunc_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE-NEXT:    xsrdpiz 3, 3
 ; PC64LE-NEXT:    xvrdpiz 2, 0
 ; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 def $vsl2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 2, 1
 ; PC64LE9-NEXT:    xsrdpiz 3, 3
 ; PC64LE9-NEXT:    xvrdpiz 2, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
@@ -8350,11 +8046,9 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 62, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 48
@@ -8377,10 +8071,8 @@ define <2 x double> @constrained_vector_tan_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 62, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 34, 62, 1
 ; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lxv 62, 32(1) # 16-byte Folded Reload
@@ -8503,7 +8195,6 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    fmr 1, 30
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE-NEXT:    fmr 1, 31
 ; PC64LE-NEXT:    bl tan
@@ -8514,7 +8205,6 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
 ; PC64LE-NEXT:    xxlor 2, 63, 63
 ; PC64LE-NEXT:    lfd 30, 64(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    addi 1, 1, 80
 ; PC64LE-NEXT:    ld 0, 16(1)
@@ -8537,7 +8227,6 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    fmr 1, 30
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 63
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    bl tan
@@ -8547,7 +8236,6 @@ define <3 x double> @constrained_vector_tan_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
 ; PC64LE9-NEXT:    lxv 63, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    lfd 31, 56(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    lfd 30, 48(1) # 8-byte Folded Reload
 ; PC64LE9-NEXT:    addi 1, 1, 64
 ; PC64LE9-NEXT:    ld 0, 16(1)
@@ -8580,22 +8268,18 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 62
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE-NEXT:    xxlor 1, 63, 63
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 61, 1, 1
 ; PC64LE-NEXT:    xxswapd 1, 63
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE-NEXT:    bl tan
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    li 3, 80
 ; PC64LE-NEXT:    vmr 2, 30
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
 ; PC64LE-NEXT:    li 3, 64
@@ -8622,20 +8306,16 @@ define <4 x double> @constrained_vector_tan_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 62
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 62, 61, 1
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscpsgndp 61, 1, 1
 ; PC64LE9-NEXT:    xxswapd 1, 63
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PC64LE9-NEXT:    bl tan
 ; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 35, 61, 1
 ; PC64LE9-NEXT:    vmr 2, 30
 ; PC64LE9-NEXT:    lxv 63, 64(1) # 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 4321b213b631c..190cf6fe1eaad 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -4465,9 +4465,8 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 136(r1)
@@ -4496,7 +4495,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4544,18 +4542,16 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; BE-NEXT:    vmr v31, v3
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v30
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    std r3, 136(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 152(r1)
@@ -4592,7 +4588,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v30
 ; CHECK-NEXT:    mtvsrd v30, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4602,7 +4597,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4670,36 +4664,32 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; BE-NEXT:    vmr v31, v5
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v28
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v29, v29
 ; BE-NEXT:    std r3, 136(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    xxswapd vs1, v29
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v30, v30
 ; BE-NEXT:    std r3, 152(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    xxswapd vs1, v30
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    std r3, 168(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 176(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 176(r1)
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 184(r1)
@@ -4752,7 +4742,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v28
 ; CHECK-NEXT:    mtvsrd v28, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4762,7 +4751,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v29
 ; CHECK-NEXT:    mtvsrd v29, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4772,7 +4760,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v30
 ; CHECK-NEXT:    mtvsrd v30, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4782,7 +4769,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index 9667a26120149..b6d0bd5c05894 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -4476,9 +4476,8 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 136(r1)
@@ -4507,7 +4506,6 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4555,18 +4553,16 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 ; BE-NEXT:    vmr v31, v3
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v30
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    std r3, 136(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 152(r1)
@@ -4603,7 +4599,6 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v30
 ; CHECK-NEXT:    mtvsrd v30, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4613,7 +4608,6 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4681,36 +4675,32 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; BE-NEXT:    vmr v31, v5
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    xxswapd vs1, v28
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 128(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v29, v29
 ; BE-NEXT:    std r3, 136(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    xxswapd vs1, v29
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 144(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v30, v30
 ; BE-NEXT:    std r3, 152(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    xxswapd vs1, v30
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 160(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    xxlor f1, v31, v31
 ; BE-NEXT:    std r3, 168(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
-; BE-NEXT:    std r3, 176(r1)
 ; BE-NEXT:    xxswapd vs1, v31
-; BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; BE-NEXT:    std r3, 176(r1)
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    std r3, 184(r1)
@@ -4763,7 +4753,6 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v28
 ; CHECK-NEXT:    mtvsrd v28, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4773,7 +4762,6 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v29
 ; CHECK-NEXT:    mtvsrd v29, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4783,7 +4771,6 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v30
 ; CHECK-NEXT:    mtvsrd v30, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
@@ -4793,7 +4780,6 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxswapd vs1, v31
 ; CHECK-NEXT:    mtvsrd v31, r3
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
index edd3fb7b1754b..4a036a7868c1a 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
@@ -1081,14 +1081,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs0, v2
 ; PWR9LE-NEXT:    xvadddp vs0, v2, vs0
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v2f64_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    xxswapd vs0, v2
 ; PWR9BE-NEXT:    xvadddp vs1, v2, vs0
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v2f64_fast:
@@ -1096,14 +1094,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs0, v2
 ; PWR10LE-NEXT:    xvadddp vs0, v2, vs0
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v2f64_fast:
 ; PWR10BE:       # %bb.0: # %entry
 ; PWR10BE-NEXT:    xxswapd vs0, v2
 ; PWR10BE-NEXT:    xvadddp vs1, v2, vs0
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> %a)
@@ -1203,7 +1199,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v4f64_fast:
@@ -1211,7 +1206,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvadddp vs0, v2, v3
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v4f64_fast:
@@ -1220,7 +1214,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v4f64_fast:
@@ -1228,7 +1221,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvadddp vs0, v2, v3
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> %a)
@@ -1378,7 +1370,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v8f64_fast:
@@ -1388,7 +1379,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvadddp vs0, vs1, vs0
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v8f64_fast:
@@ -1399,7 +1389,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v8f64_fast:
@@ -1409,7 +1398,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvadddp vs0, vs1, vs0
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %a)
@@ -1659,7 +1647,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16f64_fast:
@@ -1673,7 +1660,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvadddp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v16f64_fast:
@@ -1688,7 +1674,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16f64_fast:
@@ -1702,7 +1687,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvadddp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> %a)
@@ -2188,7 +2172,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v32f64_fast:
@@ -2214,7 +2197,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvadddp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v32f64_fast:
@@ -2241,7 +2223,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v32f64_fast:
@@ -2267,7 +2248,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvadddp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v32f64(double -0.000000e+00, <32 x double> %a)
@@ -3297,7 +3277,6 @@ define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v64f64_fast:
@@ -3355,7 +3334,6 @@ define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvadddp vs0, vs1, vs0
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v64f64_fast:
@@ -3414,7 +3392,6 @@ define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvadddp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v64f64_fast:
@@ -3472,7 +3449,6 @@ define dso_local double @v64f64_fast(<64 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvadddp vs0, vs1, vs0
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvadddp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fadd.v64f64(double -0.000000e+00, <64 x double> %a)
@@ -3660,8 +3636,6 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9LE-NEXT:    stfd f1, 32(r1)
 ; PWR9LE-NEXT:    lxv vs1, 32(r1)
 ; PWR9LE-NEXT:    xxswapd vs2, vs1
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR9LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR9LE-NEXT:    addi r1, r1, 64
 ; PWR9LE-NEXT:    ld r0, 16(r1)
 ; PWR9LE-NEXT:    mtlr r0
@@ -3678,8 +3652,6 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9BE-NEXT:    stfd f1, 112(r1)
 ; PWR9BE-NEXT:    lxv vs1, 112(r1)
 ; PWR9BE-NEXT:    xxswapd vs2, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR9BE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR9BE-NEXT:    addi r1, r1, 144
 ; PWR9BE-NEXT:    ld r0, 16(r1)
 ; PWR9BE-NEXT:    mtlr r0
@@ -3695,8 +3667,6 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR10LE-NEXT:    stfd f1, 32(r1)
 ; PWR10LE-NEXT:    lxv vs1, 32(r1)
 ; PWR10LE-NEXT:    xxswapd vs2, vs1
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR10LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR10LE-NEXT:    addi r1, r1, 64
 ; PWR10LE-NEXT:    ld r0, 16(r1)
 ; PWR10LE-NEXT:    mtlr r0
@@ -3713,8 +3683,6 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR10BE-NEXT:    stfd f1, 112(r1)
 ; PWR10BE-NEXT:    lxv vs1, 112(r1)
 ; PWR10BE-NEXT:    xxswapd vs2, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR10BE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR10BE-NEXT:    addi r1, r1, 144
 ; PWR10BE-NEXT:    ld r0, 16(r1)
 ; PWR10BE-NEXT:    mtlr r0
@@ -4077,8 +4045,6 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR9LE-NEXT:    stfd f1, 32(r1)
 ; PWR9LE-NEXT:    lxv vs1, 32(r1)
 ; PWR9LE-NEXT:    xxswapd vs2, vs1
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR9LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR9LE-NEXT:    addi r1, r1, 96
 ; PWR9LE-NEXT:    ld r0, 16(r1)
 ; PWR9LE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
@@ -4133,8 +4099,6 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR9BE-NEXT:    lfd f28, 144(r1) # 8-byte Folded Reload
 ; PWR9BE-NEXT:    lfd f27, 136(r1) # 8-byte Folded Reload
 ; PWR9BE-NEXT:    lfd f26, 128(r1) # 8-byte Folded Reload
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR9BE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR9BE-NEXT:    addi r1, r1, 176
 ; PWR9BE-NEXT:    ld r0, 16(r1)
 ; PWR9BE-NEXT:    mtlr r0
@@ -4174,8 +4138,6 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR10LE-NEXT:    stfd f1, 32(r1)
 ; PWR10LE-NEXT:    lxv vs1, 32(r1)
 ; PWR10LE-NEXT:    xxswapd vs2, vs1
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR10LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR10LE-NEXT:    addi r1, r1, 96
 ; PWR10LE-NEXT:    ld r0, 16(r1)
 ; PWR10LE-NEXT:    lfd f31, -8(r1) # 8-byte Folded Reload
@@ -4230,8 +4192,6 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR10BE-NEXT:    lfd f26, 128(r1) # 8-byte Folded Reload
 ; PWR10BE-NEXT:    lxv vs1, 112(r1)
 ; PWR10BE-NEXT:    xxswapd vs2, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PWR10BE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
 ; PWR10BE-NEXT:    addi r1, r1, 176
 ; PWR10BE-NEXT:    ld r0, 16(r1)
 ; PWR10BE-NEXT:    mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
index b1f72f694aea5..7d024144b7c32 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fmax.ll
@@ -635,14 +635,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs0, v2
 ; PWR9LE-NEXT:    xvmaxdp vs0, v2, vs0
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v2f64_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    xxswapd vs0, v2
 ; PWR9BE-NEXT:    xvmaxdp vs1, v2, vs0
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v2f64_fast:
@@ -650,14 +648,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs0, v2
 ; PWR10LE-NEXT:    xvmaxdp vs0, v2, vs0
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v2f64_fast:
 ; PWR10BE:       # %bb.0: # %entry
 ; PWR10BE-NEXT:    xxswapd vs0, v2
 ; PWR10BE-NEXT:    xvmaxdp vs1, v2, vs0
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
@@ -704,7 +700,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v4f64_fast:
@@ -712,7 +707,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmaxdp vs0, v2, v3
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v4f64_fast:
@@ -721,7 +715,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v4f64_fast:
@@ -729,7 +722,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmaxdp vs0, v2, v3
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a)
@@ -786,7 +778,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v8f64_fast:
@@ -796,7 +787,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v8f64_fast:
@@ -807,7 +797,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v8f64_fast:
@@ -817,7 +806,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a)
@@ -894,7 +882,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16f64_fast:
@@ -908,7 +895,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v16f64_fast:
@@ -923,7 +909,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16f64_fast:
@@ -937,7 +922,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a)
@@ -1074,7 +1058,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v32f64_fast:
@@ -1100,7 +1083,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v32f64_fast:
@@ -1127,7 +1109,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v32f64_fast:
@@ -1153,7 +1134,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmax.v32f64(<32 x double> %a)
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
index e806a702cd62b..9b01889b91f6e 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fmin.ll
@@ -635,14 +635,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs0, v2
 ; PWR9LE-NEXT:    xvmindp vs0, v2, vs0
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v2f64_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    xxswapd vs0, v2
 ; PWR9BE-NEXT:    xvmindp vs1, v2, vs0
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v2f64_fast:
@@ -650,14 +648,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs0, v2
 ; PWR10LE-NEXT:    xvmindp vs0, v2, vs0
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v2f64_fast:
 ; PWR10BE:       # %bb.0: # %entry
 ; PWR10BE-NEXT:    xxswapd vs0, v2
 ; PWR10BE-NEXT:    xvmindp vs1, v2, vs0
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
@@ -704,7 +700,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v4f64_fast:
@@ -712,7 +707,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmindp vs0, v2, v3
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v4f64_fast:
@@ -721,7 +715,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v4f64_fast:
@@ -729,7 +722,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmindp vs0, v2, v3
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
@@ -786,7 +778,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v8f64_fast:
@@ -796,7 +787,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmindp vs0, vs1, vs0
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v8f64_fast:
@@ -807,7 +797,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v8f64_fast:
@@ -817,7 +806,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmindp vs0, vs1, vs0
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
@@ -894,7 +882,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16f64_fast:
@@ -908,7 +895,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmindp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v16f64_fast:
@@ -923,7 +909,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16f64_fast:
@@ -937,7 +922,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmindp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
@@ -1074,7 +1058,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v32f64_fast:
@@ -1100,7 +1083,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmindp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v32f64_fast:
@@ -1127,7 +1109,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmindp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v32f64_fast:
@@ -1153,7 +1134,6 @@ define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmindp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmindp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fmul.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fmul.ll
index e123f5c2056d6..b566bb9d2d911 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fmul.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fmul.ll
@@ -1081,14 +1081,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs0, v2
 ; PWR9LE-NEXT:    xvmuldp vs0, v2, vs0
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v2f64_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    xxswapd vs0, v2
 ; PWR9BE-NEXT:    xvmuldp vs1, v2, vs0
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v2f64_fast:
@@ -1096,14 +1094,12 @@ define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs0, v2
 ; PWR10LE-NEXT:    xvmuldp vs0, v2, vs0
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v2f64_fast:
 ; PWR10BE:       # %bb.0: # %entry
 ; PWR10BE-NEXT:    xxswapd vs0, v2
 ; PWR10BE-NEXT:    xvmuldp vs1, v2, vs0
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a)
@@ -1203,7 +1199,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v4f64_fast:
@@ -1211,7 +1206,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmuldp vs0, v2, v3
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v4f64_fast:
@@ -1220,7 +1214,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v4f64_fast:
@@ -1228,7 +1221,6 @@ define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmuldp vs0, v2, v3
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a)
@@ -1378,7 +1370,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v8f64_fast:
@@ -1388,7 +1379,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmuldp vs0, vs1, vs0
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v8f64_fast:
@@ -1399,7 +1389,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v8f64_fast:
@@ -1409,7 +1398,6 @@ define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmuldp vs0, vs1, vs0
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a)
@@ -1659,7 +1647,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
 ; PWR9LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR9LE-NEXT:    xxswapd vs1, vs0
-; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16f64_fast:
@@ -1673,7 +1660,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR9BE-NEXT:    xvmuldp vs0, vs0, vs2
 ; PWR9BE-NEXT:    xxswapd vs1, vs0
 ; PWR9BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR9BE-NEXT:    blr
 ;
 ; PWR10LE-LABEL: v16f64_fast:
@@ -1688,7 +1674,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
 ; PWR10LE-NEXT:    xvmuldp vs0, vs0, vs1
 ; PWR10LE-NEXT:    xxswapd vs1, vs0
-; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16f64_fast:
@@ -1702,7 +1687,6 @@ define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    xvmuldp vs0, vs0, vs2
 ; PWR10BE-NEXT:    xxswapd vs1, vs0
 ; PWR10BE-NEXT:    xvmuldp vs1, vs0, vs1
-; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; PWR10BE-NEXT:    blr
 entry:
   %0 = call fast double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a)
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 32cbfd6d810ac..d1d29a0f884c6 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1993,7 +1993,6 @@ define double @test63(<2 x double> %a) {
 ; CHECK-LE-LABEL: test63:
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    xxswapd vs1, v2
-; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-LE-NEXT:    blr
   %v = extractelement <2 x double> %a, i32 0
   ret double %v
@@ -2006,13 +2005,11 @@ define double @test64(<2 x double> %a) {
 ; CHECK-LABEL: test64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxswapd vs1, v2
-; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test64:
 ; CHECK-REG:       # %bb.0:
 ; CHECK-REG-NEXT:    xxswapd vs1, v2
-; CHECK-REG-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test64:

From 56fa0195ed952dfa53f36890759f4b0ac3556e01 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 2 Aug 2024 18:04:57 +0300
Subject: [PATCH 104/427] [analyzer] Fix crash on using `bitcast(<type>,
 <array>)` as array subscript (#101647)

Current CSA logic does not expect `LazyCompoundValKind` as array index.
This may happen if array is used as subscript to another, in case of
bitcast to integer type.

Catch such cases and return `UnknownVal`, since CSA cannot model
array -> int casts.

Closes #94496

(cherry picked from commit d96569ecc2807a13dab6495d8cc4e82775b00af1)
---
 clang/docs/ReleaseNotes.rst             |  3 +++
 clang/lib/StaticAnalyzer/Core/Store.cpp | 12 +++++++++++-
 clang/test/Analysis/exercise-ps.c       | 20 ++++++++++++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c42cb9932f3f7..5cd398c22c946 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1372,6 +1372,9 @@ Crash and bug fixes
 - Fixed a crash when storing through an address that refers to the address of
   a label. (#GH89185)
 
+- Fixed a crash when using ``__builtin_bitcast(type, array)`` as an array
+  subscript. (#GH94496)
+
 - Z3 crosschecking (aka. Z3 refutation) is now bounded, and can't consume
   more total time than the eymbolic execution itself. (#GH97298)
 
diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp
index 67ca61bb56ba2..b436dd746d21f 100644
--- a/clang/lib/StaticAnalyzer/Core/Store.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Store.cpp
@@ -472,7 +472,17 @@ SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset,
   const auto *ElemR = dyn_cast<ElementRegion>(BaseRegion);
 
   // Convert the offset to the appropriate size and signedness.
-  Offset = svalBuilder.convertToArrayIndex(Offset).castAs<NonLoc>();
+  auto Off = svalBuilder.convertToArrayIndex(Offset).getAs<NonLoc>();
+  if (!Off) {
+    // Handle cases when LazyCompoundVal is used for an array index.
+    // Such case is possible if code does:
+    //   char b[4];
+    //   a[__builtin_bitcast(int, b)];
+    // Return UnknownVal, since we cannot model it.
+    return UnknownVal();
+  }
+
+  Offset = Off.value();
 
   if (!ElemR) {
     // If the base region is not an ElementRegion, create one.
diff --git a/clang/test/Analysis/exercise-ps.c b/clang/test/Analysis/exercise-ps.c
index d1e1771afddb5..50643d5b04687 100644
--- a/clang/test/Analysis/exercise-ps.c
+++ b/clang/test/Analysis/exercise-ps.c
@@ -1,10 +1,13 @@
-// RUN: %clang_analyze_cc1 %s -verify -Wno-error=implicit-function-declaration \
-// RUN:   -analyzer-checker=core,unix.Malloc \
+// RUN: %clang_analyze_cc1 %s -triple=x86_64-unknown-linux \
+// RUN:   -verify -Wno-error=implicit-function-declaration \
+// RUN:   -analyzer-checker=core,unix.Malloc,debug.ExprInspection \
 // RUN:   -analyzer-config core.CallAndMessage:ArgPointeeInitializedness=true
 //
 // Just exercise the analyzer on code that has at one point caused issues
 // (i.e., no assertions or crashes).
 
+void clang_analyzer_dump_int(int);
+
 static void f1(const char *x, char *y) {
   while (*x != 0) {
     *y++ = *x++;
@@ -30,3 +33,16 @@ void f3(void *dest) {
   void *src = __builtin_alloca(5);
   memcpy(dest, src, 1); // expected-warning{{2nd function call argument is a pointer to uninitialized value}}
 }
+
+// Reproduce crash from GH#94496. When array is used as subcript to another array, CSA cannot model it
+// and should just assume it's unknown and do not crash.
+void f4(char *array) {
+  char b[4] = {0};
+
+  _Static_assert(sizeof(int) == 4, "Wrong triple for the test");
+
+  clang_analyzer_dump_int(__builtin_bit_cast(int, b)); // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump_int(array[__builtin_bit_cast(int, b)]); // expected-warning {{Unknown}}
+
+  array[__builtin_bit_cast(int, b)] = 0x10; // no crash
+}

From ba80cdd27874284077c2c79e54e867d9af1b956e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 2 Aug 2024 10:10:15 -0700
Subject: [PATCH 105/427] [asan,test] Disable _FORTIFY_SOURCE test incompatible
 with glibc 2.40

In terms of bug catching capability, `_FORTIFY_SOURCE` does not perform
as well as some dynamic instrumentation tools. When a sanitizer is used,
generally `_FORTIFY_SOURCE` should be disabled since sanitizer runtime
does not implement most `*_chk` functions. Using `_FORTIFY_SOURCE`
will regress error checking (asan/hwasan/tsan) or cause false positives
(msan).

`*printf_chk` are the most pronounced `_chk` interceptors for
uninstrumented DSOes (https://reviews.llvm.org/D40951).

glibc 2.40 introduced `pass_object_info` style fortified source for some
functions ([1]). `fprintf` will be mangled as
`_ZL7fprintfP8_IO_FILEU17pass_object_size1PKcz`, which has no associated
interceptor, leading to printf-fortify-5.c failure.

Just disable the test. Fix #100877

[1]: https://sourceware.org/pipermail/libc-alpha/2024-February/154531.html

Pull Request: https://github.com/llvm/llvm-project/pull/101566

(cherry picked from commit bbdccf4c94ff18a0761b03a0e2c8b05805385132)
---
 .../test/asan/TestCases/Linux/printf-fortify-5.c      |  3 ++-
 compiler-rt/test/lit.common.cfg.py                    | 11 ++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/Linux/printf-fortify-5.c b/compiler-rt/test/asan/TestCases/Linux/printf-fortify-5.c
index c7522e4029ea1..86cf4ab0c9a22 100644
--- a/compiler-rt/test/asan/TestCases/Linux/printf-fortify-5.c
+++ b/compiler-rt/test/asan/TestCases/Linux/printf-fortify-5.c
@@ -1,7 +1,8 @@
 // RUN: %clang -fPIC -shared -O2 -D_FORTIFY_SOURCE=2 -D_DSO %s -o %t.so
 // RUN: %clang_asan -o %t %t.so %s
 // RUN: not %run %t 2>&1 | FileCheck %s
-// REQUIRES: glibc-2.27
+/// Incompatible with pass_object_info style fortified source since glibc 2.40.
+// REQUIRES: glibc-2.27 && !glibc-2.40
 #ifdef _DSO
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 70bf43e2fac59..0690c3a18efdb 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -674,7 +674,16 @@ def add_glibc_versions(ver_string):
 
         ver = LooseVersion(ver_string)
         any_glibc = False
-        for required in ["2.19", "2.27", "2.30", "2.33", "2.34", "2.37", "2.38"]:
+        for required in [
+            "2.19",
+            "2.27",
+            "2.30",
+            "2.33",
+            "2.34",
+            "2.37",
+            "2.38",
+            "2.40",
+        ]:
             if ver >= LooseVersion(required):
                 config.available_features.add("glibc-" + required)
                 any_glibc = True

From 9be2c0e00bae94a9e8ddc475e762d8297a409faf Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <qwu@ibm.com>
Date: Fri, 2 Aug 2024 15:01:15 -0400
Subject: [PATCH 106/427] Revert "[AIX] Turn on `#pragma mc_func` check by
 default (#101336)"

This reverts commit b9335176db718bf64c72d48107eb9dff28ed979e.

(cherry picked from commit dd7a4c3e5ee3300588b7c12631f3305553d8ea6c)
---
 clang/include/clang/Driver/Options.td         | 4 ++--
 clang/include/clang/Lex/PreprocessorOptions.h | 4 ++--
 clang/lib/Driver/ToolChains/AIX.cpp           | 6 ++++--
 clang/test/Preprocessor/pragma_mc_func.c      | 6 ++----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index bed6e7af9dce9..359a698ea87dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8087,8 +8087,8 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 } // let Visibility = [CC1Option]
 
 defm err_pragma_mc_func_aix : BoolFOption<"err-pragma-mc-func-aix",
-  PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultTrue,
-  PosFlag<SetTrue, [], [ClangOption],
+  PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Treat uses of #pragma mc_func as errors">,
   NegFlag<SetFalse,[], [ClangOption, CC1Option],
           "Ignore uses of #pragma mc_func">>;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index f48b7ecb90e1e..3f7dd9db18ba7 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -213,7 +213,7 @@ class PreprocessorOptions {
 
   /// If set, the preprocessor reports an error when processing #pragma mc_func
   /// on AIX.
-  bool ErrorOnPragmaMcfuncOnAIX = true;
+  bool ErrorOnPragmaMcfuncOnAIX = false;
 
 public:
   PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
@@ -252,7 +252,7 @@ class PreprocessorOptions {
     PrecompiledPreambleBytes.first = 0;
     PrecompiledPreambleBytes.second = false;
     RetainExcludedConditionalBlocks = false;
-    ErrorOnPragmaMcfuncOnAIX = true;
+    ErrorOnPragmaMcfuncOnAIX = false;
   }
 };
 
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index 0615a8a9e8d17..fb780fb75651d 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -558,8 +558,10 @@ void AIX::addClangTargetOptions(
                               options::OPT_fno_sized_deallocation))
     CC1Args.push_back("-fno-sized-deallocation");
 
-  if (!Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix,
-                    options::OPT_fno_err_pragma_mc_func_aix, true))
+  if (Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix,
+                   options::OPT_fno_err_pragma_mc_func_aix, false))
+    CC1Args.push_back("-ferr-pragma-mc-func-aix");
+  else
     CC1Args.push_back("-fno-err-pragma-mc-func-aix");
 }
 
diff --git a/clang/test/Preprocessor/pragma_mc_func.c b/clang/test/Preprocessor/pragma_mc_func.c
index bf12f7107ff5c..f0d3e49e5dddc 100644
--- a/clang/test/Preprocessor/pragma_mc_func.c
+++ b/clang/test/Preprocessor/pragma_mc_func.c
@@ -1,8 +1,5 @@
-// RUN: not %clang --target=powerpc64-ibm-aix -fsyntax-only %s 2>&1 | FileCheck %s
 // RUN: not %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
 // RUN:   %s 2>&1 | FileCheck %s
-// RUN: not %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix \
-// RUN:   -ferr-pragma-mc-func-aix -fsyntax-only %s 2>&1 | FileCheck %s
 #pragma mc_func asm_barrier {"60000000"}
 
 // CHECK:  error: #pragma mc_func is not supported
@@ -11,10 +8,11 @@
 // RUN: %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix -fsyntax-only %s
 // RUN: %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
 // RUN:    -fno-err-pragma-mc-func-aix %s
+// RUN: %clang --target=powerpc64-ibm-aix -fsyntax-only %s
 // RUN: %clang --target=powerpc64-ibm-aix -Werror=unknown-pragmas \
 // RUN:   -fno-err-pragma-mc-func-aix -fsyntax-only %s
 
-// Cases on a non-AIX target.
+// Cases where we have errors or warnings.
 // RUN: not %clang --target=powerpc64le-unknown-linux-gnu \
 // RUN:   -Werror=unknown-pragmas -fno-err-pragma-mc-func-aix -fsyntax-only %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=UNUSED %s

From 51b4c6643ab38c64683f8150f82f468d4b679225 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Sat, 3 Aug 2024 06:36:43 +0100
Subject: [PATCH 107/427] [AMDGPU] Include `<cstdint>` in AMDGPUMCTargetDesc
 (#101766)

(cherry picked from commit 8f39502b85d34998752193e85f36c408d3c99248)
---
 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 3ef00f75735b0..879dbe1b279b1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCTARGETDESC_H
 
+#include <cstdint>
 #include <memory>
 
 namespace llvm {

From 4a211acad1e37f2b7c2bd3dc6d09575d630b55a3 Mon Sep 17 00:00:00 2001
From: Jannick Kremer <51118500+DeinAlptraum@users.noreply.github.com>
Date: Sat, 3 Aug 2024 14:56:54 +0100
Subject: [PATCH 108/427] [libclang] Fix symbol version of `getBinaryOpcode`
 functions (#101820)

#98489 resurrected an [old patch](https://reviews.llvm.org/D10833) that
was adding new libclang functions. That PR got merged with old `LLVM_13`
symbol versions for new functions. This patch fixes this oversight.

(cherry picked from commit 2bae7aeab42062e61d6f9d6458660d4a5646f7af)
---
 clang/tools/libclang/libclang.map | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/tools/libclang/libclang.map b/clang/tools/libclang/libclang.map
index 91c329b5765d4..371fe512ce71c 100644
--- a/clang/tools/libclang/libclang.map
+++ b/clang/tools/libclang/libclang.map
@@ -54,8 +54,6 @@ LLVM_13 {
     clang_Cursor_Evaluate;
     clang_Cursor_getArgument;
     clang_Cursor_getBriefCommentText;
-    clang_Cursor_getBinaryOpcode;
-    clang_Cursor_getBinaryOpcodeStr;
     clang_Cursor_getCXXManglings;
     clang_Cursor_getCommentRange;
     clang_Cursor_getMangling;
@@ -430,6 +428,12 @@ LLVM_17 {
     clang_getCursorUnaryOperatorKind;
 };
 
+LLVM_19 {
+  global:
+    clang_Cursor_getBinaryOpcode;
+    clang_Cursor_getBinaryOpcodeStr;
+};
+
 # Example of how to add a new symbol version entry.  If you do add a new symbol
 # version, please update the example to depend on the version you added.
 # LLVM_X {

From 69555e03241e9b1d20e6272f569a9a4a113cadd4 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Sat, 3 Aug 2024 22:19:44 +0200
Subject: [PATCH 109/427] [sanitizer_common] Fix UnwindFast on SPARC (#101634)

```
  UBSan-Standalone-sparc :: TestCases/Misc/Linux/diag-stacktrace.cpp
```
`FAIL`s on 32 and 64-bit Linux/sparc64 (and on Solaris/sparcv9, too: the
test isn't Linux-specific at all). With
`UBSAN_OPTIONS=fast_unwind_on_fatal=1`, the stack trace shows a
duplicate innermost frame:
```
compiler-rt/test/ubsan/TestCases/Misc/Linux/diag-stacktrace.cpp:14:31: runtime error: execution reached the end of a value-returning function without returning a value
    #0 0x7003a708 in f() compiler-rt/test/ubsan/TestCases/Misc/Linux/diag-stacktrace.cpp:14:35
    #1 0x7003a708 in f() compiler-rt/test/ubsan/TestCases/Misc/Linux/diag-stacktrace.cpp:14:35
    #2 0x7003a714 in g() compiler-rt/test/ubsan/TestCases/Misc/Linux/diag-stacktrace.cpp:17:38
```
which isn't seen with `fast_unwind_on_fatal=0`.

This turns out to be another fallout from fixing
`__builtin_return_address`/`__builtin_extract_return_addr` on SPARC. In
`sanitizer_stacktrace_sparc.cpp` (`BufferedStackTrace::UnwindFast`) the
`pc` arg is the return address, while `pc1` from the stack frame
(`fr_savpc`) is the address of the `call` insn, leading to a double
entry for the innermost frame in `trace_buffer[]`.

This patch fixes this by moving the adjustment before all uses.

Tested on `sparc64-unknown-linux-gnu` and `sparcv9-sun-solaris2.11`
(with the `ubsan/TestCases/Misc/Linux` tests enabled).

(cherry picked from commit 3368a3245ce5049b090d7c1081c2d52a6b6fda68)
---
 .../sanitizer_common/sanitizer_stacktrace_sparc.cpp   | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
index a2000798a3907..74f435287af3c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
@@ -58,17 +58,16 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top,
   // Avoid infinite loop when frame == frame[0] by using frame > prev_frame.
   while (IsValidFrame(bp, stack_top, bottom) && IsAligned(bp, sizeof(uhwptr)) &&
          size < max_depth) {
-    uhwptr pc1 = ((uhwptr *)bp)[15];
+    // %o7 contains the address of the call instruction and not the
+    // return address, so we need to compensate.
+    uhwptr pc1 = GetNextInstructionPc(((uhwptr *)bp)[15]);
     // Let's assume that any pointer in the 0th page is invalid and
     // stop unwinding here.  If we're adding support for a platform
     // where this isn't true, we need to reconsider this check.
     if (pc1 < kPageSize)
       break;
-    if (pc1 != pc) {
-      // %o7 contains the address of the call instruction and not the
-      // return address, so we need to compensate.
-      trace_buffer[size++] = GetNextInstructionPc((uptr)pc1);
-    }
+    if (pc1 != pc)
+      trace_buffer[size++] = pc1;
     bottom = bp;
     bp = (uptr)((uhwptr *)bp)[14] + STACK_BIAS;
   }

From 8d2474975f56e85f5a25610fb6291dc0f3976a3e Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Thu, 25 Jul 2024 16:25:37 +0100
Subject: [PATCH 110/427] [clang-tidy] Fix crash in modernize-use-ranges
 (#100427)

Crash seems to be caused by the check function not handling inline
namespaces correctly for some instances. Changed how the Replacer is got
from the MatchResult now which should alleviate any potential issues

Fixes #100406

(cherry picked from commit 0762db6533eda3453158c7b9b0631542c47093a8)
---
 .../clang-tidy/utils/UseRangesCheck.cpp       | 64 +++++++++----------
 .../clang-tidy/utils/UseRangesCheck.h         |  2 +-
 .../modernize/Inputs/use-ranges/fake_std.h    | 17 +++--
 3 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp
index e2daa5010e2ae..aba4d17ccd035 100644
--- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp
@@ -39,12 +39,6 @@ static constexpr const char ArgName[] = "ArgName";
 
 namespace clang::tidy::utils {
 
-static bool operator==(const UseRangesCheck::Indexes &L,
-                       const UseRangesCheck::Indexes &R) {
-  return std::tie(L.BeginArg, L.EndArg, L.ReplaceArg) ==
-         std::tie(R.BeginArg, R.EndArg, R.ReplaceArg);
-}
-
 static std::string getFullPrefix(ArrayRef<UseRangesCheck::Indexes> Signature) {
   std::string Output;
   llvm::raw_string_ostream OS(Output);
@@ -54,15 +48,6 @@ static std::string getFullPrefix(ArrayRef<UseRangesCheck::Indexes> Signature) {
   return Output;
 }
 
-static llvm::hash_code hash_value(const UseRangesCheck::Indexes &Indexes) {
-  return llvm::hash_combine(Indexes.BeginArg, Indexes.EndArg,
-                            Indexes.ReplaceArg);
-}
-
-static llvm::hash_code hash_value(const UseRangesCheck::Signature &Sig) {
-  return llvm::hash_combine_range(Sig.begin(), Sig.end());
-}
-
 namespace {
 
 AST_MATCHER(Expr, hasSideEffects) {
@@ -123,24 +108,26 @@ makeMatcherPair(StringRef State, const UseRangesCheck::Indexes &Indexes,
 }
 
 void UseRangesCheck::registerMatchers(MatchFinder *Finder) {
-  Replaces = getReplacerMap();
+  auto Replaces = getReplacerMap();
   ReverseDescriptor = getReverseDescriptor();
   auto BeginEndNames = getFreeBeginEndMethods();
   llvm::SmallVector<StringRef, 4> BeginNames{
       llvm::make_first_range(BeginEndNames)};
   llvm::SmallVector<StringRef, 4> EndNames{
       llvm::make_second_range(BeginEndNames)};
-  llvm::DenseSet<ArrayRef<Signature>> Seen;
+  Replacers.clear();
+  llvm::DenseSet<Replacer *> SeenRepl;
   for (auto I = Replaces.begin(), E = Replaces.end(); I != E; ++I) {
-    const ArrayRef<Signature> &Signatures =
-        I->getValue()->getReplacementSignatures();
-    if (!Seen.insert(Signatures).second)
+    auto Replacer = I->getValue();
+    if (!SeenRepl.insert(Replacer.get()).second)
       continue;
-    assert(!Signatures.empty() &&
-           llvm::all_of(Signatures, [](auto Index) { return !Index.empty(); }));
+    Replacers.push_back(Replacer);
+    assert(!Replacer->getReplacementSignatures().empty() &&
+           llvm::all_of(Replacer->getReplacementSignatures(),
+                        [](auto Index) { return !Index.empty(); }));
     std::vector<StringRef> Names(1, I->getKey());
     for (auto J = std::next(I); J != E; ++J)
-      if (J->getValue()->getReplacementSignatures() == Signatures)
+      if (J->getValue() == Replacer)
         Names.push_back(J->getKey());
 
     std::vector<ast_matchers::internal::DynTypedMatcher> TotalMatchers;
@@ -148,7 +135,7 @@ void UseRangesCheck::registerMatchers(MatchFinder *Finder) {
     // signatures in order of length(longest to shortest). This way any
     // signature that is a subset of another signature will be matched after the
     // other.
-    SmallVector<Signature> SigVec(Signatures);
+    SmallVector<Signature> SigVec(Replacer->getReplacementSignatures());
     llvm::sort(SigVec, [](auto &L, auto &R) { return R.size() < L.size(); });
     for (const auto &Signature : SigVec) {
       std::vector<ast_matchers::internal::DynTypedMatcher> Matchers;
@@ -163,7 +150,8 @@ void UseRangesCheck::registerMatchers(MatchFinder *Finder) {
     }
     Finder->addMatcher(
         callExpr(
-            callee(functionDecl(hasAnyName(std::move(Names))).bind(FuncDecl)),
+            callee(functionDecl(hasAnyName(std::move(Names)))
+                       .bind((FuncDecl + Twine(Replacers.size() - 1).str()))),
             ast_matchers::internal::DynTypedMatcher::constructVariadic(
                 ast_matchers::internal::DynTypedMatcher::VO_AnyOf,
                 ASTNodeKind::getFromNodeKind<CallExpr>(),
@@ -205,21 +193,33 @@ static void removeFunctionArgs(DiagnosticBuilder &Diag, const CallExpr &Call,
 }
 
 void UseRangesCheck::check(const MatchFinder::MatchResult &Result) {
-  const auto *Function = Result.Nodes.getNodeAs<FunctionDecl>(FuncDecl);
-  std::string Qualified = "::" + Function->getQualifiedNameAsString();
-  auto Iter = Replaces.find(Qualified);
-  assert(Iter != Replaces.end());
+  Replacer *Replacer = nullptr;
+  const FunctionDecl *Function = nullptr;
+  for (auto [Node, Value] : Result.Nodes.getMap()) {
+    StringRef NodeStr(Node);
+    if (!NodeStr.consume_front(FuncDecl))
+      continue;
+    Function = Value.get<FunctionDecl>();
+    size_t Index;
+    if (NodeStr.getAsInteger(10, Index)) {
+      llvm_unreachable("Unable to extract replacer index");
+    }
+    assert(Index < Replacers.size());
+    Replacer = Replacers[Index].get();
+    break;
+  }
+  assert(Replacer && Function);
   SmallString<64> Buffer;
-  for (const Signature &Sig : Iter->getValue()->getReplacementSignatures()) {
+  for (const Signature &Sig : Replacer->getReplacementSignatures()) {
     Buffer.assign({BoundCall, getFullPrefix(Sig)});
     const auto *Call = Result.Nodes.getNodeAs<CallExpr>(Buffer);
     if (!Call)
       continue;
     auto Diag = createDiag(*Call);
-    if (auto ReplaceName = Iter->getValue()->getReplaceName(*Function))
+    if (auto ReplaceName = Replacer->getReplaceName(*Function))
       Diag << FixItHint::CreateReplacement(Call->getCallee()->getSourceRange(),
                                            *ReplaceName);
-    if (auto Include = Iter->getValue()->getHeaderInclusion(*Function))
+    if (auto Include = Replacer->getHeaderInclusion(*Function))
       Diag << Inserter.createIncludeInsertion(
           Result.SourceManager->getFileID(Call->getBeginLoc()), *Include);
     llvm::SmallVector<unsigned, 3> ToRemove;
diff --git a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h
index 927e9694b0ec7..3a454bcf0cf07 100644
--- a/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h
+++ b/clang-tools-extra/clang-tidy/utils/UseRangesCheck.h
@@ -85,7 +85,7 @@ class UseRangesCheck : public ClangTidyCheck {
   std::optional<TraversalKind> getCheckTraversalKind() const override;
 
 private:
-  ReplacerMap Replaces;
+  std::vector<llvm::IntrusiveRefCntPtr<Replacer>> Replacers;
   std::optional<ReverseIteratorDescriptor> ReverseDescriptor;
   IncludeInserter Inserter;
 };
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-ranges/fake_std.h b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-ranges/fake_std.h
index 6596511c7a38b..69ac9954f4afa 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-ranges/fake_std.h
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-ranges/fake_std.h
@@ -7,8 +7,8 @@ template <typename T> class vector {
 public:
   using iterator = T *;
   using const_iterator = const T *;
-  using reverse_iterator = T*;
-  using reverse_const_iterator = const T*;
+  using reverse_iterator = T *;
+  using reverse_const_iterator = const T *;
 
   constexpr const_iterator begin() const;
   constexpr const_iterator end() const;
@@ -72,8 +72,8 @@ template <typename Container> constexpr auto crend(const Container &Cont) {
   return Cont.crend();
 }
 // Find
-template< class InputIt, class T >
-InputIt find( InputIt first, InputIt last, const T& value );
+template <class InputIt, class T>
+InputIt find(InputIt first, InputIt last, const T &value);
 
 // Reverse
 template <typename Iter> void reverse(Iter begin, Iter end);
@@ -82,6 +82,7 @@ template <typename Iter> void reverse(Iter begin, Iter end);
 template <class InputIt1, class InputIt2>
 bool includes(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2);
 
+inline namespace _V1 {
 // IsPermutation
 template <class ForwardIt1, class ForwardIt2>
 bool is_permutation(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2);
@@ -97,9 +98,10 @@ template <class InputIt1, class InputIt2>
 bool equal(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2);
 
 template <class InputIt1, class InputIt2, class BinaryPred>
-bool equal(InputIt1 first1, InputIt1 last1,
-           InputIt2 first2, InputIt2 last2, BinaryPred p) {
-  // Need a definition to suppress undefined_internal_type when invoked with lambda
+bool equal(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
+           BinaryPred p) {
+  // Need a definition to suppress undefined_internal_type when invoked with
+  // lambda
   return true;
 }
 
@@ -108,6 +110,7 @@ void iota(ForwardIt first, ForwardIt last, T value);
 
 template <class ForwardIt>
 ForwardIt rotate(ForwardIt first, ForwardIt middle, ForwardIt last);
+} // namespace _V1
 
 } // namespace std
 

From 5c8dcabc529b4dc293dfc3fd29369ff1fe6517ea Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 26 Jul 2024 11:26:34 -0700
Subject: [PATCH 111/427] Build release binaries for multiple targets (#98431)

This adds release binary builds for the 4 platforms currently supported
by the free GitHub Action runners:

* Linux x86_64
* Windows x86_64
* Mac x86_64
* Mac AArch64

The test stages for these are known to fail, but the creating and
upoading of the release binaries should pass.

(cherry picked from commit 247251aee0d4314385a3fea86e31484d3d792ffb)
---
 .github/workflows/release-binaries-all.yml    |  94 ++++
 .../release-binaries-save-stage/action.yml    |  38 ++
 .../release-binaries-setup-stage/action.yml   |  59 +++
 .github/workflows/release-binaries.yml        | 474 ++++++++++++------
 .github/workflows/release-tasks.yml           |  10 +
 clang/cmake/caches/Release.cmake              |   6 +-
 6 files changed, 520 insertions(+), 161 deletions(-)
 create mode 100644 .github/workflows/release-binaries-all.yml
 create mode 100644 .github/workflows/release-binaries-save-stage/action.yml
 create mode 100644 .github/workflows/release-binaries-setup-stage/action.yml

diff --git a/.github/workflows/release-binaries-all.yml b/.github/workflows/release-binaries-all.yml
new file mode 100644
index 0000000000000..73c9d96946e33
--- /dev/null
+++ b/.github/workflows/release-binaries-all.yml
@@ -0,0 +1,94 @@
+name: Release Binaries All
+
+permissions:
+  contents: read # Default everything to read-only
+
+on:
+  workflow_dispatch:
+    inputs:
+      release-version:
+        description: 'Release Version'
+        required: true
+        type: string
+      upload:
+        description: 'Upload binaries to the release page'
+        required: true
+        default: false
+        type: boolean
+
+  workflow_call:
+    inputs:
+      release-version:
+        description: 'Release Version'
+        required: true
+        type: string
+      upload:
+        description: 'Upload binaries to the release page'
+        required: true
+        default: false
+        type: boolean
+
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      # When a PR is closed, we still start this workflow, but then skip
+      # all the jobs, which makes it effectively a no-op.  The reason to
+      # do this is that it allows us to take advantage of concurrency groups
+      # to cancel in progress CI jobs whenever the PR is closed.
+      - closed
+    paths:
+      - '.github/workflows/release-binaries-all.yml'
+      - '.github/workflows/release-binaries.yml'
+      - '.github/workflows/release-binaries-setup-stage/*'
+      - '.github/workflows/release-binaries-save-stage/*'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || 'dispatch' }}
+  cancel-in-progress: True
+
+jobs:
+  setup-variables:
+    if: >-
+      (github.event_name != 'pull_request' || github.event.action != 'closed')
+    runs-on: ubuntu-22.04
+    outputs:
+      release-version: ${{ steps.vars.outputs.release-version }}
+      upload: ${{ steps.vars.outputs.upload }}
+    steps:
+      - shell: bash
+        id: vars
+        run: |
+          upload="${{ inputs.upload }}"
+          release_version="${{ inputs.release-version }}"
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            upload="false"
+            release_version=""
+          fi
+          echo "release-version=$release_version" >> "$GITHUB_OUTPUT"
+          echo "upload=$upload" >> "$GITHUB_OUTPUT"
+
+  release-binaries-all:
+    name: Build Release Binaries
+    needs:
+      - setup-variables
+    permissions:
+      contents: write # For release uploads
+      id-token: write     # For artifact attestations
+      attestations: write # For artifact attestations
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on:
+          - ubuntu-22.04
+          - windows-2022
+          - macos-13
+          - macos-14
+
+    uses: ./.github/workflows/release-binaries.yml
+    with:
+      release-version: "${{ needs.setup-variables.outputs.release-version }}"
+      upload: ${{ needs.setup-variables.outputs.upload == 'true'}}
+      runs-on: "${{ matrix.runs-on }}"
+
diff --git a/.github/workflows/release-binaries-save-stage/action.yml b/.github/workflows/release-binaries-save-stage/action.yml
new file mode 100644
index 0000000000000..e2f3eeadd15be
--- /dev/null
+++ b/.github/workflows/release-binaries-save-stage/action.yml
@@ -0,0 +1,38 @@
+name: Save Stage
+description: >-
+  Upload the source and binary directories from a build stage so that they
+  can be re-used in the next stage.  This action is used to the release
+  binaries workflow into multiple stages to avoid the 6 hour timeout on
+  the GitHub hosted runners.
+inputs:
+  build-prefix:
+    description: "Directory containing the build directory."
+    required: true
+    type: 'string'
+
+runs:
+  using: "composite"
+  steps:
+    # We need to create an archive of the build directory, because it has too
+    # many files to upload.
+    - name: Package Build and Source Directories
+      shell: bash
+      run: |
+        # Windows does not support symlinks, so we need to dereference them.
+        tar --exclude build/ ${{ (runner.os == 'Windows' && '-h') || '' }} -c . | zstd -T0 -c > ../llvm-project.tar.zst
+        mv ../llvm-project.tar.zst .
+        tar -C ${{ inputs.build-prefix }} -c build/ | zstd -T0 -c > build.tar.zst
+
+    - name: Upload Stage 1 Source
+      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+      with:
+        name: ${{ runner.os }}-${{ runner.arch }}-${{ github.job }}-source
+        path: llvm-project.tar.zst
+        retention-days: 2
+
+    - name: Upload Stage 1 Build Dir
+      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+      with:
+        name: ${{ runner.os}}-${{ runner.arch }}-${{ github.job }}-build
+        path: build.tar.zst
+        retention-days: 2
diff --git a/.github/workflows/release-binaries-setup-stage/action.yml b/.github/workflows/release-binaries-setup-stage/action.yml
new file mode 100644
index 0000000000000..f5e5db27e6595
--- /dev/null
+++ b/.github/workflows/release-binaries-setup-stage/action.yml
@@ -0,0 +1,59 @@
+name: Setup Stage
+description: >-
+  Setup the next stage of the release binaries workflow.  This sets up the
+  environment correctly for a new stage of the release binaries workflow
+  and also restores the source and build directory from the previous stage.
+
+inputs:
+  previous-artifact:
+    description: >-
+      A unique descriptor for the artifact from the previous stage.  This will
+      be used to construct the final artifact pattern, which is:
+      $RUNNER_OS-$RUNNER_ARCH-$PREVIOUS_ARTIFACT-*
+    required: false
+    type: 'string'
+
+outputs:
+  build-prefix:
+    description: "Directory containing the build directory."
+    value: ${{ steps.build-prefix.outputs.build-prefix }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@22e9f909d35b50bd1181709564bfe816eaeaae81 # main
+   
+    - name: Setup Windows
+      if: startsWith(runner.os, 'Windows')
+      uses: llvm/actions/setup-windows@main
+      with:
+        arch: amd64
+
+    - name: Set Build Prefix
+      id: build-prefix
+      shell: bash
+      run: |
+        build_prefix=`pwd`
+        if [ "${{ runner.os }}" = "Linux" ]; then
+          sudo chown $USER:$USER /mnt/
+          build_prefix=/mnt/
+        fi
+        echo "build-prefix=$build_prefix" >> $GITHUB_OUTPUT
+
+    - name: Download Previous Stage Artifact
+      if: ${{ inputs.previous-artifact }}
+      id: download
+      uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
+      with:
+        pattern: ${{ runner.os }}-${{ runner.arch }}-${{ inputs.previous-artifact }}-*
+        merge-multiple: true
+
+    - name: Unpack Artifact
+      if: ${{ steps.download.outputs.download-path }}
+      shell: bash
+      run: |
+        tar --zstd -xf llvm-project.tar.zst
+        rm llvm-project.tar.zst
+        tar --zstd -C ${{ steps.build-prefix.outputs.build-prefix}} -xf build.tar.zst
+        rm build.tar.zst
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 7de4d00334d14..5f939ba5bbe2f 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -5,28 +5,38 @@ on:
     inputs:
       release-version:
         description: 'Release Version'
-        required: true
+        required: false
         type: string
       upload:
         description: 'Upload binaries to the release page'
         required: true
         default: false
         type: boolean
+      runs-on:
+        description: "Runner to use for the build"
+        required: true
+        type: choice
+        options:
+          - ubuntu-22.04
+          - windows-2022
+          - macos-13
+          - macos-14
 
   workflow_call:
     inputs:
       release-version:
         description: 'Release Version'
-        required: true
+        required: false
         type: string
       upload:
         description: 'Upload binaries to the release page'
         required: true
         default: false
         type: boolean
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    - cron:  '0 8 1 * *'
+      runs-on:
+        description: "Runner to use for the build"
+        required: true
+        type: string
 
 permissions:
   contents: read # Default everything to read-only
@@ -34,30 +44,39 @@ permissions:
 jobs:
   prepare:
     name: Prepare to build binaries
-    runs-on: ubuntu-22.04
+    runs-on: ${{ inputs.runs-on }}
     if: github.repository == 'llvm/llvm-project'
     outputs:
       release-version: ${{ steps.vars.outputs.release-version }}
       ref: ${{ steps.vars.outputs.ref }}
       upload: ${{ steps.vars.outputs.upload }}
+      target-cmake-flags: ${{ steps.vars.outputs.target-cmake-flags }}
+      build-flang: ${{ steps.vars.outputs.build-flang }}
+      enable-pgo: ${{ steps.vars.outputs.enable-pgo }}
+      release-binary-basename: ${{ steps.vars.outputs.release-binary-basename }}
+      release-binary-filename: ${{ steps.vars.outputs.release-binary-filename }}
 
     steps:
     - name: Checkout LLVM
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
 
     - name: Install Dependencies
+      shell: bash
       run: |
         pip install --require-hashes -r ./llvm/utils/git/requirements.txt
 
     - name: Check Permissions
+      if: github.event_name != 'pull_request'
       env:
         GITHUB_TOKEN: ${{ github.token }}
         USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
+      shell: bash
       run: |
         ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} --user-token "$USER_TOKEN" check-permissions
 
     - name: Collect Variables
       id: vars
+      shell: bash
       # In order for the test-release.sh script to run correctly, the LLVM
       # source needs to be at the following location relative to the build dir:
       # | X.Y.Z-rcN | ./rcN/llvm-project
@@ -67,242 +86,377 @@ jobs:
       # | X.Y.Z-rcN | -rc N -test-asserts
       # | X.Y.Z     | -final
       run: |
-        tag="${{ github.ref_name }}"
         trimmed=$(echo ${{ inputs.release-version }} | xargs)
-        [[ "$trimmed" != "" ]] && tag="llvmorg-$trimmed"
-        if [ "$tag" = "main" ]; then
-          # If tag is main, then we've been triggered by a scheduled so pass so
-          # use the head commit as the tag.
-          tag=`git rev-parse HEAD`
+        if [ -n "$trimmed" ]; then
+          release_version="$trimmed"
+          ref="llvmorg-$release_version"
+        else
+          release_version="${{ (github.event_name == 'pull_request' && format('PR{0}', github.event.pull_request.number)) || 'CI'}}-${{ github.sha }}"
+          ref=${{ github.sha }}
         fi
         if [ -n "${{ inputs.upload }}" ]; then
           upload="${{ inputs.upload }}"
         else
           upload="false"
         fi
-        bash .github/workflows/set-release-binary-outputs.sh "$tag" "$upload"
+        echo "release-version=$release_version">> $GITHUB_OUTPUT
+        echo "ref=$ref" >> $GITHUB_OUTPUT
+        echo "upload=$upload" >> $GITHUB_OUTPUT
+
+        release_binary_basename="LLVM-$release_version-${{ runner.os }}-${{ runner.arch }}"
+        echo "release-binary-basename=$release_binary_basename" >> $GITHUB_OUTPUT
+        echo "release-binary-filename=$release_binary_basename.tar.xz" >> $GITHUB_OUTPUT
+
+        # Detect necessary CMake flags
+        target="${{ runner.os }}-${{ runner.arch }}"
+        echo "enable-pgo=false" >> $GITHUB_OUTPUT
+        target_cmake_flags="-DLLVM_RELEASE_ENABLE_PGO=OFF"
+        # The macOS builds try to cross compile some libraries so we need to
+        # add extra CMake args to disable them.
+        # See https://github.com/llvm/llvm-project/issues/99767
+        if [ "${{ runner.os }}" = "macOS" ]; then
+          target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_COMPILER_RT_ENABLE_IOS=OFF"
+          if [ "${{ runner.arch }}" = "ARM64" ]; then
+            arches=arm64
+          else
+            arches=x86_64
+          fi
+          target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches"
+        fi
 
-  build-stage1-linux:
-    name: "Build Stage 1 Linux"
+        # x86 macOS and x86 Windows have trouble building flang, so disable it.
+        # Windows: https://github.com/llvm/llvm-project/issues/100202
+        # macOS: 'rebase opcodes terminated early at offset 1 of 80016' when building __fortran_builtins.mod
+        build_flang="true"
+
+        if [ "$target" = "Windows-X64" ]; then
+          target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS=\"clang;lld;lldb;clang-tools-extra;bolt;polly;mlir\""
+          build_flang="false"
+        fi
+
+        if [ "${{ runner.os }}" = "Windows" ]; then
+          # The build times out on Windows, so we need to disable LTO.
+          target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_LTO=OFF"
+        fi
+
+        echo "target-cmake-flags=$target_cmake_flags" >> $GITHUB_OUTPUT
+        echo "build-flang=$build_flang" >> $GITHUB_OUTPUT
+
+  build-stage1:
+    name: "Build Stage 1"
     needs: prepare
-    runs-on: ubuntu-22.04
     if: github.repository == 'llvm/llvm-project'
+    runs-on: ${{ inputs.runs-on }}
     steps:
     - name: Checkout LLVM
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
         ref: ${{ needs.prepare.outputs.ref }}
-
-    - name: Install Ninja
-      uses: llvm/actions/install-ninja@22e9f909d35b50bd1181709564bfe816eaeaae81 # main
+    - name: Debug - Move actions
+      if: github.event_name != 'pull_request'
+      shell: bash
+      run: |
+        cd .github/workflows
+        for d in release-binaries-setup-stage release-binaries-save-stage; do
+          mkdir $d
+          pushd $d
+          curl -O -L https://raw.githubusercontent.com/tstellar/llvm-project/main/.github/workflows/$d/action.yml
+          popd
+        done
 
     - name: Setup sccache
       uses: hendrikmuhs/ccache-action@ca3acd2731eef11f1572ccb126356c2f9298d35e # v1.2.9
       with:
-        max-size: 250M
-        key: sccache-${{ runner.os }}-release
+        # Default to 2G to workaround: https://github.com/hendrikmuhs/ccache-action/issues/174
+        max-size: 2G
+        key: sccache-${{ runner.os }}-${{ runner.arch }}-release
         variant: sccache
 
-    - name: Build Stage 1 Clang
-      run: |
-        sudo chown $USER:$USER /mnt/
-        cmake -G Ninja -C clang/cmake/caches/Release.cmake -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -S llvm -B /mnt/build
-        ninja -v -C /mnt/build
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./.github/workflows/release-binaries-setup-stage
 
-    # We need to create an archive of the build directory, because it has too
-    # many files to upload.
-    - name: Package Build and Source Directories
+    - name: Build Stage 1 Clang
+      id: build
+      shell: bash
       run: |
-        tar -c . | zstd -T0 -c > llvm-project.tar.zst
-        tar -C /mnt/ -c build/ | zstd -T0 -c > build.tar.zst
-
-    - name: Upload Stage 1 Source
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+        # There were some issues on the ARM64 MacOS runners with trying to build x86 object,
+        # so we need to set some extra cmake flags to disable this.
+        cmake -G Ninja -S llvm -B ${{ steps.setup-stage.outputs.build-prefix }}/build \
+            ${{ needs.prepare.outputs.target-cmake-flags }} \
+            -C clang/cmake/caches/Release.cmake \
+            -DBOOTSTRAP_LLVM_PARALLEL_LINK_JOBS=1 \
+            -DBOOTSTRAP_CPACK_PACKAGE_FILE_NAME="${{ needs.prepare.outputs.release-binary-basename }}" \
+            -DCMAKE_C_COMPILER_LAUNCHER=sccache \
+            -DCMAKE_CXX_COMPILER_LAUNCHER=sccache
+        ninja -v -C ${{ steps.setup-stage.outputs.build-prefix }}/build
+        # There is a race condition on the MacOS builders and this command is here
+        # to help debug that when it happens.
+        ls -ltr ${{ steps.setup-stage.outputs.build-prefix }}/build
+    
+    - name: Save Stage
+      uses: ./.github/workflows/release-binaries-save-stage
       with:
-        name: stage1-source
-        path: llvm-project.tar.zst
-        retention-days: 2
+        build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 
-    - name: Upload Stage 1 Build Dir
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
-      with:
-        name: stage1-build
-        path: build.tar.zst
-        retention-days: 2
-
-  build-stage2-linux:
-    name: "Build Stage 2 Linux"
+  build-stage2:
+    name: "Build Stage 2"
     needs:
       - prepare
-      - build-stage1-linux
-    runs-on: ubuntu-22.04
+      - build-stage1
     if: github.repository == 'llvm/llvm-project'
+    runs-on: ${{ inputs.runs-on }}
     steps:
-    - name: Install Ninja
-      uses: llvm/actions/install-ninja@22e9f909d35b50bd1181709564bfe816eaeaae81 # main
-
-    - name: Download Stage 1 Artifacts
-      uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
+    - name: Checkout Actions
+      uses: actions/checkout@v4
       with:
-        pattern: stage1-*
-        merge-multiple: true
-
-    - name: Unpack Artifacts
-      run: |
-        tar --zstd -xf llvm-project.tar.zst
-        rm llvm-project.tar.zst
-        sudo chown $USER:$USER /mnt/
-        tar --zstd -C /mnt -xf build.tar.zst
-        rm build.tar.zst
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+      with:
+        previous-artifact: build-stage1
 
     - name: Build Stage 2
       # Re-enable once PGO builds are supported.
-      if: false
-      run: |
-        ninja -C /mnt/build stage2-instrumented
-
-    # We need to create an archive of the build directory, because it has too
-    # many files to upload.
-    - name: Save Build and Source Directories
+      if: needs.prepare.outputs.enable-pgo == 'true'
+      shell: bash
       run: |
-        tar -c . | zstd -T0 -c > llvm-project.tar.zst
-        tar -C /mnt/ -c build/ | zstd -T0 -c > build.tar.zst
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix}}/build stage2-instrumented
 
-    - name: Upload Stage 2 Source
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+    - name: Save Stage
+      uses: ./workflows/.github/workflows/release-binaries-save-stage
       with:
-        name: stage2-source
-        path: ${{ github.workspace }}/llvm-project.tar.zst
-        retention-days: 2
+        build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 
-    - name: Upload Stage 2 Build Dir
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+  build-stage3-clang:
+    name: "Build Stage 3 LLVM/Clang"
+    needs:
+      - prepare
+      - build-stage2
+    if: github.repository == 'llvm/llvm-project'
+    runs-on: ${{ inputs.runs-on }}
+    steps:
+    - name: Checkout Actions
+      uses: actions/checkout@v4
       with:
-        name: stage2-build
-        path: ${{ github.workspace }}/build.tar.zst
-        retention-days: 2
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+      with:
+        previous-artifact: build-stage2
 
+    - name: Build LLVM/Clang
+      shell: bash
+      run: |
+        # There is a race condition on the MacOS builders and this command is here
+        # to help debug that when it happens.
+        ls -ltr ${{ steps.setup-stage.outputs.build-prefix }}/build
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-clang
+        # Build some of the larger binaries here too.
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \
+            clang-scan-deps \
+            modularize clangd \
+            clangd-indexer \
+            clang-check \
+            ${{ (runner.os == 'Linux' && 'clangd-fuzzer') || '' }} \
+            clang-tidy \
+            llc \
+            lli \
+            llvm-exegesis \
+            llvm-opt-fuzzer \
+            llvm-reduce \
+            llvm-lto \
+            dsymutil
+
+    - name: Save Stage
+      uses: ./workflows/.github/workflows/release-binaries-save-stage
+      with:
+        build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 
-  build-stage3-linux:
-    name: "Build Stage 3 Linux"
+  build-stage3-flang:
+    name: "Build Stage 3 Flang/MLIR/Bolt"
     needs:
       - prepare
-      - build-stage2-linux
-    outputs:
-      filename: ${{ steps.package-info.outputs.release-filename }}
-    runs-on: ubuntu-22.04-16x64
-    if: github.repository == 'llvm/llvm-project'
+      - build-stage3-clang
+    runs-on: ${{ inputs.runs-on }}
     steps:
-    - name: Install Ninja
-      uses: llvm/actions/install-ninja@22e9f909d35b50bd1181709564bfe816eaeaae81 # main
-
-    - name: 'Download artifact'
-      uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
+    - name: Checkout Actions
+      uses: actions/checkout@v4
       with:
-        pattern: stage2-*
-        merge-multiple: true
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+      with:
+        previous-artifact: build-stage3-clang
 
-    - name: Unpack Artifact
+    - name: Build Flang / MLIR / Bolt
+      shell: bash
       run: |
-        tar --zstd -xf llvm-project.tar.zst
-        rm llvm-project.tar.zst
-        sudo chown $USER:$USER /mnt/
-        tar --zstd -C /mnt -xf build.tar.zst
-        rm build.tar.zst
+        # Build some of the mlir tools that take a long time to link
+        if [ "${{ needs.prepare.outputs.build-flang }}" = "true" ]; then
+          ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang-new bbc
+        fi
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \
+            mlir-bytecode-parser-fuzzer \
+            mlir-cpu-runner \
+            mlir-lsp-server \
+            mlir-opt \
+            mlir-query \
+            mlir-reduce \
+            mlir-text-parser-fuzzer \
+            mlir-translate \
+            mlir-transform-opt \
+            mlir-cat \
+            mlir-minimal-opt \
+            mlir-minimal-opt-canonicalize \
+            mlir-pdll-lsp-server \
+            llvm-bolt \
+            llvm-bolt-heatmap
+    
+    - name: Save Stage
+      uses: ./workflows/.github/workflows/release-binaries-save-stage
+      with:
+        build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 
-    - name: Build Release Package
-      run: |
-        ninja -C /mnt/build stage2-package
+  build-stage3-all:
+    name: "Build Stage 3"
+    needs:
+      - prepare
+      - build-stage3-flang
+    runs-on: ${{ inputs.runs-on }}
+    steps:
+    - name: Checkout Actions
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+      with:
+        previous-artifact: build-stage3-flang
 
-    - id: package-info
+    - name: Build Release Package
+      shell: bash
       run: |
-        filename="LLVM-${{ needs.prepare.outputs.release-version }}-Linux.tar.xz"
-        echo "filename=$filename" >> $GITHUB_OUTPUT
-        echo "path=/mnt/build/tools/clang/stage2-bins/$filename" >> $GITHUB_OUTPUT
+        which cmake
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-package
+        # Copy Release artifact to the workspace so it is easier to upload.
+        # This is necessary, because on Windows, the build-prefix path can
+        # only be used on bash steps, because it uses the form of /d/files/
+        # and other steps expect D:\files.
+        mv ${{ steps.setup-stage.outputs.build-prefix  }}/build/tools/clang/stage2-bins/${{ needs.prepare.outputs.release-binary-filename }} .
 
     - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
-      if: always()
       with:
-        name: release-binary
-        path: ${{ steps.package-info.outputs.path }}
+        name: ${{ runner.os }}-${{ runner.arch }}-release-binary
+        # Due to path differences on Windows when running in bash vs running on node,
+        # we need to search for files in the current workspace.
+        path: |
+          ${{ needs.prepare.outputs.release-binary-filename }}
 
     # Clean up some build files to reduce size of artifact.
     - name: Clean Up Build Directory
+      shell: bash
       run: |
-        find /mnt/build -iname ${{ steps.package-info.outputs.filename }} -delete
-
-    # We need to create an archive of the build directory, because it has too
-    # many files to upload.
-    - name: Save Build and Source Directories
-      run: |
-        tar -c . | zstd -T0 -c > llvm-project.tar.zst
-        tar -C /mnt/ -c build/ | zstd -T0 -c > build.tar.zst
-
-    - name: Upload Stage 3 Source
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
-      with:
-        name: stage3-source
-        path: llvm-project.tar.zst
-        retention-days: 2
+        find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname ${{ needs.prepare.outputs.release-binary-filename }} -delete
+        rm -Rf ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/_CPack_Packages
 
-    - name: Upload Stage 3 Build Dir
-      uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
+    - name: Save Stage
+      uses: ./workflows/.github/workflows/release-binaries-save-stage
       with:
-        name: stage3-build
-        path: build.tar.zst
-        retention-days: 2
+        build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 
-  upload-release-binaries-linux:
-    name: "Upload Linux Release Binaries"
+  upload-release-binaries:
+    name: "Upload Release Binaries"
     needs:
       - prepare
-      - build-stage3-linux
-    if : ${{ needs.prepare.outputs.upload == 'true' }}
+      - build-stage3-all
+    if: >-
+      always() &&
+      github.event_name != 'pull_request' &&
+      needs.prepare.outputs.upload == 'true'
     runs-on: ubuntu-22.04
     permissions:
       contents: write # For release uploads
+      id-token: write     # For artifact attestations
+      attestations: write # For artifact attestations
 
     steps:
     - name: 'Download artifact'
       uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
       with:
-        name: release-binary
+        pattern: '*-release-binary'
+        merge-multiple: true
+
+    - name: Attest Build Provenance
+      id: provenance
+      uses: actions/attest-build-provenance@897ed5eab6ed058a474202017ada7f40bfa52940 # v1.0.0
+      with:
+        subject-path: ${{ needs.prepare.outputs.release-binary-filename }}
+
+    - name: Rename attestation file
+      run:
+        mv ${{ steps.provenance.outputs.bundle-path }} ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
+
+    - name: Upload Build Provenance
+      uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 #v4.3.3
+      with:
+        name: ${{ runner.os }}-${{ runner.arch }}-release-binary-attestation
+        path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
 
     - name: Upload Release
+      shell: bash
       run: |
         sudo apt install python3-github
         ./llvm-project/llvm/utils/release/github-upload-release.py \
         --token ${{ github.token }} \
         --release ${{ needs.prepare.outputs.release-version }} \
         upload \
-        --files ${{ needs.build-stage3-linux.outputs.release-filename }}
-
+        --files ${{ needs.prepare.outputs.release-binary-filename }}*
 
-  test-stage3-linux:
-    name: "Test Stage 3 Linux"
+  test-stage3:
+    name: "Test Stage 3"
     needs:
       - prepare
-      - build-stage3-linux
-    runs-on: ubuntu-22.04
-    if: github.repository == 'llvm/llvm-project'
+      - build-stage3-all
+    if: >-
+      github.repository == 'llvm/llvm-project'
+    runs-on: ${{ inputs.runs-on }}
     steps:
-    - name: Install Ninja
-      uses: llvm/actions/install-ninja@22e9f909d35b50bd1181709564bfe816eaeaae81 # main
-
-    - name: 'Download artifact'
-      uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
+    - name: Checkout Actions
+      uses: actions/checkout@v4
       with:
-        pattern: stage3-*
-        merge-multiple: true
-
-    - name: Unpack Artifact
-      run: |
-        tar --zstd -xf llvm-project.tar.zst
-        rm llvm-project.tar.zst
-        sudo chown $USER:$USER /mnt/
-        tar --zstd -C /mnt -xf build.tar.zst
-        rm build.tar.zst
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+      with:
+        previous-artifact: build-stage3-all
 
     - name: Run Tests
+      shell: bash
       run: |
-        ninja -C /mnt/build stage2-check-all
+        ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-check-all
diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index 2ed56dace1d4c..7dd4c306671b7 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -81,10 +81,20 @@ jobs:
     needs:
       - validate-tag
       - release-create
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on:
+          - ubuntu-22.04
+          - windows-2022
+          - macos-13
+          - macos-14
+
     uses: ./.github/workflows/release-binaries.yml
     with:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
       upload: true
+      runs-on: ${{ matrix.runs-on }}
 
   release-sources:
     name: Package Release Sources
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index 9e6feb479d45f..e5161dd9a27b9 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -29,9 +29,13 @@ endfunction()
 # cache file to CMake via -C. e.g.
 #
 # cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake
+set (DEFAULT_RUNTIMES "compiler-rt;libcxx")
+if (NOT WIN32)
+  list(APPEND DEFAULT_RUNTIMES "libcxxabi" "libunwind")
+endif()
 set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "")
 set(LLVM_RELEASE_ENABLE_PGO ON CACHE BOOL "")
-set(LLVM_RELEASE_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
+set(LLVM_RELEASE_ENABLE_RUNTIMES ${DEFAULT_RUNTIMES} CACHE STRING "")
 set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "")
 # Note we don't need to add install here, since it is one of the pre-defined
 # steps.

From e28632e63ad142c060bfb6394dac98c1c05222b4 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 26 Jul 2024 12:36:40 -0700
Subject: [PATCH 112/427] workflows: Remove left over debugging step from
 release-binaries job

(cherry picked from commit 18dee70168bcd7259daade4c86462ba859e7bed5)
---
 .github/workflows/release-binaries.yml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 5f939ba5bbe2f..0a800fd006e9d 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -152,17 +152,6 @@ jobs:
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
         ref: ${{ needs.prepare.outputs.ref }}
-    - name: Debug - Move actions
-      if: github.event_name != 'pull_request'
-      shell: bash
-      run: |
-        cd .github/workflows
-        for d in release-binaries-setup-stage release-binaries-save-stage; do
-          mkdir $d
-          pushd $d
-          curl -O -L https://raw.githubusercontent.com/tstellar/llvm-project/main/.github/workflows/$d/action.yml
-          popd
-        done
 
     - name: Setup sccache
       uses: hendrikmuhs/ccache-action@ca3acd2731eef11f1572ccb126356c2f9298d35e # v1.2.9

From d07802f405bbd982ba3c9f95bdeaed66dad31a88 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 26 Jul 2024 14:46:32 -0700
Subject: [PATCH 113/427] workflows/release-binaries: Always pull composite
 actions from main branch (#100805)

If we pull from the release tag, then if there is a bug in one of the
actions on the release tag, then we can never do a build for that tag.
Pulling from main will allows us to fix bugs in the actions we use to
build the releases.

(cherry picked from commit b0860b20878d2c84fc3ce56ea608c5186872faa2)
---
 .github/workflows/release-binaries.yml | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 0a800fd006e9d..38b2ba7ba12dc 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -148,6 +148,20 @@ jobs:
     if: github.repository == 'llvm/llvm-project'
     runs-on: ${{ inputs.runs-on }}
     steps:
+
+    - name: Checkout Actions
+      uses: actions/checkout@@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
+        sparse-checkout: |
+          .github/workflows/
+        sparse-checkout-cone-mode: false
+        path: workflows
+
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+
     - name: Checkout LLVM
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
@@ -161,10 +175,6 @@ jobs:
         key: sccache-${{ runner.os }}-${{ runner.arch }}-release
         variant: sccache
 
-    - name: Setup Stage
-      id: setup-stage
-      uses: ./.github/workflows/release-binaries-setup-stage
-
     - name: Build Stage 1 Clang
       id: build
       shell: bash
@@ -184,7 +194,7 @@ jobs:
         ls -ltr ${{ steps.setup-stage.outputs.build-prefix }}/build
     
     - name: Save Stage
-      uses: ./.github/workflows/release-binaries-save-stage
+      uses: ./workflows/.github/workflows/release-binaries-save-stage
       with:
         build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 

From bb4292506d38ac176d68c97f60ef9313b4122291 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 26 Jul 2024 14:51:47 -0700
Subject: [PATCH 114/427] workflow/release-binaries: Fix typo

Introduced in b0860b20878d2c84fc3ce56ea608c5186872faa2.

(cherry picked from commit d41f565318e2a414acfd7eec1cfb2fbf515391ba)
---
 .github/workflows/release-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 38b2ba7ba12dc..f7ed88ff03add 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -150,7 +150,7 @@ jobs:
     steps:
 
     - name: Checkout Actions
-      uses: actions/checkout@@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
         ref: ${{ (github.event_name == 'pull_request' && github.sha) || 'main' }}
         sparse-checkout: |

From 5657751f8eb52356b297d15e455ad93eb89280d3 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 30 Jul 2024 18:54:20 -0700
Subject: [PATCH 115/427] workflows/release-binaries: Fetch composite actions
 outside of default workspace (#100845)

Otherwise, the checkout step will override them.

(cherry picked from commit 41003ff3fe344dee5c963d462a4bc6d528811d86)
---
 .github/workflows/release-binaries.yml | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index f7ed88ff03add..b1b046dbad5f8 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -156,17 +156,34 @@ jobs:
         sparse-checkout: |
           .github/workflows/
         sparse-checkout-cone-mode: false
+        # Check out outside of working directory so the source checkout doesn't
+        # remove it.
         path: workflows
 
-    - name: Setup Stage
-      id: setup-stage
-      uses: ./workflows/.github/workflows/release-binaries-setup-stage
+    # actions/checkout does not support paths outside of the GITHUB_WORKSPACE.
+    # Also, anything that we put inside of GITHUB_WORKSPACE will be overwritten
+    # by future actions/checkout steps.  Therefore, in order to checkout the
+    # latest actions from main, we need to first checkout out the actions inside of
+    # GITHUB_WORKSPACE (see previous step), then use actions/checkout to checkout
+    # the code being built and the move the actions from main back into GITHUB_WORKSPACE,
+    # becasue the uses on composite actions only reads workflows from inside GITHUB_WORKSPACE.
+    - shell: bash
+      run: mv workflows  ../workflows-main
 
     - name: Checkout LLVM
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       with:
         ref: ${{ needs.prepare.outputs.ref }}
 
+    - name: Copy main workflows
+      shell: bash
+      run: |
+        mv ../workflows-main .
+
+    - name: Setup Stage
+      id: setup-stage
+      uses: ./workflows-main/.github/workflows/release-binaries-setup-stage
+
     - name: Setup sccache
       uses: hendrikmuhs/ccache-action@ca3acd2731eef11f1572ccb126356c2f9298d35e # v1.2.9
       with:
@@ -194,7 +211,7 @@ jobs:
         ls -ltr ${{ steps.setup-stage.outputs.build-prefix }}/build
     
     - name: Save Stage
-      uses: ./workflows/.github/workflows/release-binaries-save-stage
+      uses: ./workflows-main/.github/workflows/release-binaries-save-stage
       with:
         build-prefix: ${{ steps.setup-stage.outputs.build-prefix }}
 

From 5307f8226159e9144f8da3511ab337b7e426776b Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Sat, 3 Aug 2024 09:11:51 -0700
Subject: [PATCH 116/427] workflows/release-binaries: Fix problem with python
 installation on macos-14 (#101774)

python3 wasn't able to see modules installed by pip, so we need to use
the setup-python action to ensure that the default pip and python3 both
use the same prefix.

See https://github.com/actions/runner-images/issues/10385

(cherry picked from commit 59476c99983d3813b412c9b0c0464365644c23a8)
---
 .github/workflows/release-binaries.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index b1b046dbad5f8..7cc8b7a1e56e8 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -57,6 +57,12 @@ jobs:
       release-binary-filename: ${{ steps.vars.outputs.release-binary-filename }}
 
     steps:
+    # It's good practice to use setup-python, but this is also required on macos-14
+    # due to https://github.com/actions/runner-images/issues/10385
+    - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f
+      with:
+        python-version: '3.12'
+
     - name: Checkout LLVM
       uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
 

From b047c24071d5f12f5cd4fa397bfd144a4f735935 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 2 Aug 2024 21:52:03 -0700
Subject: [PATCH 117/427] workflows: Re-implement the get-llvm-version action
 as a composite action (#101569)

The old version in the llvm/actions repo stopped working after the
version variables were moved out of llvm/CMakeLists.txt. Composite
actions are more simple and don't require javascript, which is why I
reimplemented it as a composite action.

This will fix the failing abi checks on the release branch.

(cherry picked from commit 14837aff058f9a2d32b8277debe619d8eb1995a1)
---
 .github/workflows/get-llvm-version/action.yml | 26 ++++++
 .github/workflows/libclang-abi-tests.yml      | 16 ++--
 .github/workflows/llvm-tests.yml              | 14 +--
 llvm/utils/release/get-llvm-version.sh        | 86 +++++++++++++++++++
 4 files changed, 127 insertions(+), 15 deletions(-)
 create mode 100644 .github/workflows/get-llvm-version/action.yml
 create mode 100755 llvm/utils/release/get-llvm-version.sh

diff --git a/.github/workflows/get-llvm-version/action.yml b/.github/workflows/get-llvm-version/action.yml
new file mode 100644
index 0000000000000..2218d926fc13d
--- /dev/null
+++ b/.github/workflows/get-llvm-version/action.yml
@@ -0,0 +1,26 @@
+name: Get LLVM Version
+description: >-
+  Get the LLVM version from the llvm-project source tree.  This action assumes
+  the llvm-project sources have already been checked out into GITHUB_WORKSPACE.
+
+outputs:
+  major:
+    description: LLVM major version
+    value: ${{ steps.version.outputs.major }}
+  minor:
+    description: LLVM minor version
+    value: ${{ steps.version.outputs.minor }}
+  patch:
+    description: LLVM patch version
+    value: ${{ steps.version.outputs.patch }}
+
+runs:
+  using: "composite"
+  steps:
+    - name: Get Version
+      shell: bash
+      id: version
+      run: |
+        for v in major minor patch; do
+          echo "$v=`llvm/utils/release/get-llvm-version.sh --$v`" >> $GITHUB_OUTPUT
+        done
diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
index 972d21c3bcedf..9e839ff49e283 100644
--- a/.github/workflows/libclang-abi-tests.yml
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -33,9 +33,9 @@ jobs:
       ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }}
       ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }}
       BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }}
-      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
-      LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }}
-      LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }}
+      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.major }}
+      LLVM_VERSION_MINOR: ${{ steps.version.outputs.minor }}
+      LLVM_VERSION_PATCH: ${{ steps.version.outputs.patch }}
     steps:
       - name: Checkout source
         uses: actions/checkout@v4
@@ -44,14 +44,14 @@ jobs:
 
       - name: Get LLVM version
         id: version
-        uses: llvm/actions/get-llvm-version@main
+        uses: ./.github/workflows/get-llvm-version
 
       - name: Setup Variables
         id: vars
         run: |
           remote_repo='https://github.com/llvm/llvm-project'
-          if [ ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then
-            major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))
+          if [ ${{ steps.version.outputs.patch }} -eq 0 ]; then
+            major_version=$(( ${{ steps.version.outputs.major }} - 1))
             baseline_ref="llvmorg-$major_version.1.0"
 
             # If there is a minor release, we want to use that as the base line.
@@ -73,8 +73,8 @@ jobs:
             } >> "$GITHUB_OUTPUT"
           else
             {
-              echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.LLVM_VERSION_MAJOR }}"
-              echo "BASELINE_REF=llvmorg-${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.1.0"
+              echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.major }}"
+              echo "BASELINE_REF=llvmorg-${{ steps.version.outputs.major }}.1.0"
               echo "ABI_HEADERS=."
               echo "ABI_LIBS=libclang.so libclang-cpp.so"
             } >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 64d60bc3da45e..26e644229aaa2 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -43,9 +43,9 @@ jobs:
       ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }}
       BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }}
       BASELINE_VERSION_MINOR: ${{ steps.vars.outputs.BASELINE_VERSION_MINOR }}
-      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
-      LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }}
-      LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }}
+      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.major }}
+      LLVM_VERSION_MINOR: ${{ steps.version.outputs.minor }}
+      LLVM_VERSION_PATCH: ${{ steps.version.outputs.patch }}
     steps:
       - name: Checkout source
         uses: actions/checkout@v4
@@ -54,7 +54,7 @@ jobs:
 
       - name: Get LLVM version
         id: version
-        uses: llvm/actions/get-llvm-version@main
+        uses: ./.github/workflows/get-llvm-version
 
       - name: Setup Variables
         id: vars
@@ -66,14 +66,14 @@ jobs:
           # 18.1.0 We want to check 17.0.x
           # 18.1.1 We want to check 18.1.0
           echo "BASELINE_VERSION_MINOR=1" >> "$GITHUB_OUTPUT"
-          if [ ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then
+          if [ ${{ steps.version.outputs.patch }} -eq 0 ]; then
             {
-              echo "BASELINE_VERSION_MAJOR=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))"
+              echo "BASELINE_VERSION_MAJOR=$(( ${{ steps.version.outputs.major }} - 1))"
               echo "ABI_HEADERS=llvm-c"
             } >> "$GITHUB_OUTPUT"
           else
             {
-              echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.LLVM_VERSION_MAJOR }}"
+              echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.major }}"
               echo "ABI_HEADERS=."
             } >> "$GITHUB_OUTPUT"
           fi
diff --git a/llvm/utils/release/get-llvm-version.sh b/llvm/utils/release/get-llvm-version.sh
new file mode 100755
index 0000000000000..2183e2e0edacd
--- /dev/null
+++ b/llvm/utils/release/get-llvm-version.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+#===-- get-llvm-version.sh - Get LLVM Version from sources -----------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+#
+# Extract the current LLVM version from the CMake files. 
+#
+#===------------------------------------------------------------------------===#
+
+cmake_file=$(dirname $0)/../../../cmake/Modules/LLVMVersion.cmake
+function usage() {
+    echo "usage: `basename $0`"
+    echo ""
+    echo "Calling this script with now options will output the full version: e.g. 19.1.0"
+    echo " --cmake-file      Path to cmake file with the version (default: $cmake_file)
+    echo " You can use at most one of the following options:
+    echo " --major           Print the major version."
+    echo " --minor           Print the minor version."
+    echo " --patch           Print the patch version."
+}
+
+print=""
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        --cmake-file )
+            shift
+    	    cmake_file="$1"
+    	    ;;
+        --major)
+            if [ -n "$print" ]; then
+                echo "Only one of --major, --minor, --patch is allowed"
+                exit 1
+            fi
+            print="major"
+            ;;
+        --minor)
+            if [ -n "$print" ]; then
+                echo "Only one of --major, --minor, --patch is allowed"
+                exit 1
+            fi
+            print="minor"
+            ;;
+        --patch)
+            if [ -n "$print" ]; then
+                echo "Only one of --major, --minor, --patch is allowed"
+                exit 1
+            fi
+            print="patch"
+            ;;
+        --help | -h | -\? )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+major=`grep -o 'LLVM_VERSION_MAJOR[[:space:]]\+\([0-9]\+\)' $cmake_file  | grep -o '[0-9]\+'`
+minor=`grep -o 'LLVM_VERSION_MINOR[[:space:]]\+\([0-9]\+\)' $cmake_file  | grep -o '[0-9]\+'`
+patch=`grep -o 'LLVM_VERSION_PATCH[[:space:]]\+\([0-9]\+\)' $cmake_file  | grep -o '[0-9]\+'`
+
+case $print in
+    major)
+        echo "$major"
+        ;;
+    minor)
+        echo "$minor"
+        ;;
+    patch)
+        echo "$patch"
+        ;;
+    *)
+        echo "$major.$minor.$patch"
+        ;;
+esac
+

From 1069d16a3980111c804ab15ffc4de85c42a7f22a Mon Sep 17 00:00:00 2001
From: DianQK <dianqk@dianqk.net>
Date: Sun, 4 Aug 2024 16:45:10 +0800
Subject: [PATCH 118/427] [Metadata] Try to merge the first and last ranges.
 (#101860)

Fixes #101859.

If we have at least 2 ranges, we have to try to merge the last and first
ones to handle the wrap range.

(cherry picked from commit 4377656f2419a8eb18c01e86929b689dcf22b5d6)
---
 llvm/lib/IR/Metadata.cpp                |  5 +++--
 llvm/test/Transforms/GVN/merge-range.ll | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/GVN/merge-range.ll

diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index ae5f5de142328..fd2f4d184162f 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -1318,10 +1318,11 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
     ++BI;
   }
 
-  // If we have more than 2 ranges (4 endpoints) we have to try to merge
+  // We haven't handled wrap in the previous merge,
+  // if we have at least 2 ranges (4 endpoints) we have to try to merge
   // the last and first ones.
   unsigned Size = EndPoints.size();
-  if (Size > 4) {
+  if (Size > 2) {
     ConstantInt *FB = EndPoints[0];
     ConstantInt *FE = EndPoints[1];
     if (tryMergeRange(EndPoints, FB, FE)) {
diff --git a/llvm/test/Transforms/GVN/merge-range.ll b/llvm/test/Transforms/GVN/merge-range.ll
new file mode 100644
index 0000000000000..ad1fa4cae5662
--- /dev/null
+++ b/llvm/test/Transforms/GVN/merge-range.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
+define i8 @foo(ptr %arg, i8 %arg1) {
+; CHECK-LABEL: define i8 @foo(
+; CHECK-SAME: ptr [[ARG:%.*]], i8 [[ARG1:%.*]]) {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[ARG]], align 1, !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[I3:%.*]] = add i8 [[I]], [[I]]
+; CHECK-NEXT:    ret i8 [[I3]]
+;
+bb:
+  %i = load i8, ptr %arg, align 1, !range !{i8 127, i8 -20}
+  %i2 = load i8, ptr %arg, align 1, !range !{i8 -27, i8 -24, i8 -20, i8 -17}
+  %i3 = add i8 %i, %i2
+  ret i8 %i3
+}
+;.
+; CHECK: [[RNG0]] = !{i8 127, i8 -17}
+;.

From 06484125e0f419dcc023ff7dd007066f5a9c9a7e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 4 Aug 2024 16:36:00 +0400
Subject: [PATCH 119/427] InferAddressSpaces: Fix mishandling stores of
 pointers to themselves (#101877)

(cherry picked from commit 3c483b887e5a32a0ddc0a52a467b31f74aad25bb)
---
 .../Transforms/Scalar/InferAddressSpaces.cpp  |  2 +-
 .../AMDGPU/store-pointer-to-self.ll           | 71 +++++++++++++++++++
 2 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll

diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index c9be8ee00cdc7..6b9566f1ae461 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1233,7 +1233,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
         // If V is used as the pointer operand of a compatible memory operation,
         // sets the pointer operand to NewV. This replacement does not change
         // the element type, so the resultant load/store is still valid.
-        CurUser->replaceUsesOfWith(V, NewV);
+        U.set(NewV);
         continue;
       }
 
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
new file mode 100644
index 0000000000000..bce0e4ec1fe16
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
+
+; Make sure memory instructions where the pointer appears in both a
+; pointer and value operand work correctly.
+
+declare void @user(ptr)
+
+; Make sure only the pointer operand use of the store is replaced
+define void @store_flat_pointer_to_self() {
+; CHECK-LABEL: define void @store_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  store ptr %flat, ptr %flat, align 8
+  call void @user(ptr %flat)
+  ret void
+}
+
+; FIXME: Should be able to optimize the pointer operand to flat.
+define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret ptr [[XCHG]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
+  call void @user(ptr %flat)
+  ret ptr %xchg
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
+; CHECK-SAME: ptr [[CMP:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
+  call void @user(ptr %flat)
+  ret { ptr, i1 } %cmpx
+}
+
+define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
+; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
+; CHECK-SAME: ptr [[NEW:%.*]]) {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
+; CHECK-NEXT:    call void @user(ptr [[FLAT]])
+; CHECK-NEXT:    ret { ptr, i1 } [[CMPX]]
+;
+  %alloca = alloca ptr, align 8, addrspace(5)
+  %flat = addrspacecast ptr addrspace(5) %alloca to ptr
+  %cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
+  call void @user(ptr %flat)
+  ret { ptr, i1 } %cmpx
+}

From 47ee66b9b969af834cfd9df82273ed9fe0418802 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 4 Aug 2024 23:20:45 +0300
Subject: [PATCH 120/427] [ARM] [Windows] Use IMAGE_SYM_CLASS_STATIC for
 private functions (#101828)

For functions with private linkage, pick
IMAGE_SYM_CLASS_STATIC rather than IMAGE_SYM_CLASS_EXTERNAL;
GlobalValue::isInternalLinkage() only checks for
InternalLinkage, while GlobalValue::isLocalLinkage() checks for both
InternalLinkage and PrivateLinkage.

This matches what the AArch64 target does, since commit
3406934e4db4bf95c230db072608ed062c13ad5b.

This activates a preexisting fix for the AArch64 target from
1e7f592a890aad860605cf5220530b3744e107ba, for the ARM target as well.

When a relocation points at a symbol, one usually can convey an offset
to the symbol by encoding it as an immediate in the instruction.
However, for the ARM and AArch64 branch instructions, the immediate
stored in the instruction is ignored by MS link.exe (and lld-link
matches this aspect). (It would be simple to extend lld-link to support
it - but such object files would be incompatible with MS link.exe.)

This was worked around by 1e7f592a890aad860605cf5220530b3744e107ba by
emitting symbols into the object file symbol table, for temporary
symbols that otherwise would have been omitted, if they have the class
IMAGE_SYM_CLASS_STATIC, in order to avoid needing an offset in the
relocated instruction.

This change gives the symbols generated from functions with the IR level
"private" linkage the right class, to activate that workaround.

This fixes https://github.com/llvm/llvm-project/issues/100101, fixing
code generation for coroutines for Windows on ARM. After the change in
f78688134026686288a8d310b493d9327753a022, coroutines generate a function
with private linkage, and calls to this function were previously broken
for this target.

(cherry picked from commit 8dd065d5bc81b0c8ab57f365bb169a5d92928f25)
---
 llvm/lib/Target/ARM/ARMAsmPrinter.cpp         |  6 +-
 llvm/test/CodeGen/ARM/Windows/private-func.ll | 17 ++++++
 .../test/MC/ARM/Windows/branch-reloc-offset.s | 57 +++++++++++++++++++
 3 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/Windows/private-func.ll
 create mode 100644 llvm/test/MC/ARM/Windows/branch-reloc-offset.s

diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 642739a29d6b0..96d7074e6ef37 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -153,9 +153,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     OptimizationGoals = 0;
 
   if (Subtarget->isTargetCOFF()) {
-    bool Internal = F.hasInternalLinkage();
-    COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
-                                            : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+    bool Local = F.hasLocalLinkage();
+    COFF::SymbolStorageClass Scl =
+        Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL;
     int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
 
     OutStreamer->beginCOFFSymbolDef(CurrentFnSym);
diff --git a/llvm/test/CodeGen/ARM/Windows/private-func.ll b/llvm/test/CodeGen/ARM/Windows/private-func.ll
new file mode 100644
index 0000000000000..2d030ae3fabbb
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/Windows/private-func.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+define dso_local void @func1() {
+entry:
+  call void @func2()
+  ret void
+}
+
+define private void @func2() {
+entry:
+  ret void
+}
+
+; CHECK:      .def    .Lfunc2;
+; CHECK-NEXT: .scl    3;
+; CHECK-NEXT: .type   32;
+; CHECK-NEXT: .endef
diff --git a/llvm/test/MC/ARM/Windows/branch-reloc-offset.s b/llvm/test/MC/ARM/Windows/branch-reloc-offset.s
new file mode 100644
index 0000000000000..2e70a723ccf78
--- /dev/null
+++ b/llvm/test/MC/ARM/Windows/branch-reloc-offset.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -triple thumbv7-windows-gnu -filetype obj %s -o - | llvm-objdump -D -r - | FileCheck %s
+
+    .text
+main:
+    nop
+    b .Ltarget
+    b .Lother_target
+
+// A private label target in the same section
+    .def .Ltarget
+    .scl 3
+    .type 32
+    .endef
+    .p2align 2
+.Ltarget:
+    bx lr
+
+// A private label target in another section
+    .section "other", "xr"
+    nop
+    nop
+    nop
+    nop
+    nop
+    nop
+    nop
+    nop
+    .def .Lother_target
+    .scl 3
+    .type 32
+    .endef
+    .p2align 2
+.Lother_target:
+    bx lr
+
+// Check that both branches have a relocation with a zero offset.
+//
+// CHECK: 00000000 <main>:
+// CHECK:        0: bf00          nop
+// CHECK:        2: f000 b800     b.w     0x6 <main+0x6>          @ imm = #0x0
+// CHECK:                         00000002:  IMAGE_REL_ARM_BRANCH24T      .Ltarget
+// CHECK:        6: f000 b800     b.w     0xa <main+0xa>          @ imm = #0x0
+// CHECK:                         00000006:  IMAGE_REL_ARM_BRANCH24T      .Lother_target
+// CHECK:        a: bf00          nop
+// CHECK: 0000000c <.Ltarget>:
+// CHECK:        c: 4770          bx      lr
+// CHECK: 00000000 <other>:
+// CHECK:        0: bf00          nop
+// CHECK:        2: bf00          nop
+// CHECK:        4: bf00          nop
+// CHECK:        6: bf00          nop
+// CHECK:        8: bf00          nop
+// CHECK:        a: bf00          nop
+// CHECK:        c: bf00          nop
+// CHECK:        e: bf00          nop
+// CHECK: 00000010 <.Lother_target>:
+// CHECK:       10: 4770          bx      lr

From 2193e4f88c5a96462a41528912ddedcb405ea29c Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Wed, 31 Jul 2024 16:51:45 -0700
Subject: [PATCH 121/427] Forward declare OSSpinLockLock on MacOS since it's
 not shipped on the system. (#101392)

Fixes build errors on some SDKs.

rdar://132607572
(cherry picked from commit 3a4c7cc56c07b2db9010c2228fc7cb2a43dd9b2d)
---
 compiler-rt/lib/rtsan/rtsan_interceptors.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp
index 4d5423ec629d2..b63040446e53c 100644
--- a/compiler-rt/lib/rtsan/rtsan_interceptors.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_interceptors.cpp
@@ -21,6 +21,18 @@
 #include "rtsan/rtsan_context.h"
 
 #if SANITIZER_APPLE
+
+#if TARGET_OS_MAC
+// On MacOS OSSpinLockLock is deprecated and no longer present in the headers,
+// but the symbol still exists on the system. Forward declare here so we
+// don't get compilation errors.
+#include <stdint.h>
+extern "C" {
+typedef int32_t OSSpinLock;
+void OSSpinLockLock(volatile OSSpinLock *__lock);
+}
+#endif
+
 #include <libkern/OSAtomic.h>
 #include <os/lock.h>
 #endif

From 283443371b8c2e40750e56d86c05f711a89d786c Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 4 Aug 2024 13:48:22 -0700
Subject: [PATCH 122/427] ReleaseNotes: lld/ELF: mention CREL

---
 lld/docs/ReleaseNotes.rst | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 09081e421e905..98fddcd7bf7f2 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -26,6 +26,12 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
+* Experimental CREL relocations with explicit addends are now supported using the
+  temporary section type code 0x40000020 (``clang -c -Wa,--crel,--allow-experimental-crel``).
+  LLVM will change the code and break compatibility (Clang and lld of different
+  versions are not guaranteed to cooperate, unlike other features). CREL with
+  implicit addends are not supported.
+  (`#98115 <https://github.com/llvm/llvm-project/pull/98115>`_)
 * ``EI_OSABI`` in the output is now inferred from input object files.
   (`#97144 <https://github.com/llvm/llvm-project/pull/97144>`_)
 * ``--compress-sections <section-glib>={none,zlib,zstd}[:level]`` is added to compress
@@ -88,7 +94,7 @@ ELF Improvements
   (`#94099 <https://github.com/llvm/llvm-project/pull/94099>`_)
   Non-alloc orphan sections are now placed at the end.
   (`#94519 <https://github.com/llvm/llvm-project/pull/94519>`_)
-* R_X86_64_REX_GOTPCRELX of the addq form is no longer incorrectly optimized when the address is larger than 0x80000000.
+* ``R_X86_64_REX_GOTPCRELX`` of the addq form is no longer incorrectly optimized when the address is larger than 0x80000000.
 
 Breaking changes
 ----------------

From d033ae172d1c5a85fd09c36e23608a9241ea2990 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Mon, 5 Aug 2024 10:40:33 +0200
Subject: [PATCH 123/427] Bump version to 19.1.0-rc2

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 1ad844cbea838..7d5d292957dbe 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -10,6 +10,6 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX -rc1)
+  set(LLVM_VERSION_SUFFIX -rc2)
 endif()
 

From cc6be8216ad24a03911c5eee429901ad61c173f4 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 08:55:45 +0200
Subject: [PATCH 124/427] =?UTF-8?q?[sanitizer=5Fcommon][test]=20Fix=20Sani?=
 =?UTF-8?q?tizerIoctl/KVM=5FGET=5F*=20tests=20on=20Linux/=E2=80=A6=20(#100?=
 =?UTF-8?q?532)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…sparc64

Two ioctl tests `FAIL` on Linux/sparc64 (both 32 and 64-bit):
```
  SanitizerCommon-Unit :: ./Sanitizer-sparc-Test/SanitizerIoctl/KVM_GET_LAPIC
  SanitizerCommon-Unit :: ./Sanitizer-sparc-Test/SanitizerIoctl/KVM_GET_MP_STATE
```
like
```
compiler-rt/lib/sanitizer_common/tests/./Sanitizer-sparc-Test --gtest_filter=SanitizerIoctl.KVM_GET_LAPIC
--
compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp:91: Failure
Value of: res
  Actual: false
Expected: true

compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp:92: Failure
Expected equality of these values:
  ioctl_desc::WRITE
    Which is: 2
  desc.type
    Which is: 1
```
The problem is that Linux/sparc64, like Linux/mips, uses a different
layout for the `ioctl` `request` arg than most other Linux targets as
can be seen in `sanitizer_platform_limits_posix.h` (`IOC_*`). Therefore,
this patch makes the tests use the correct one.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit 9eefe065bb2752b0db9ed553d2406e9a15ce349e)
---
 .../lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp
index 8da09f693c2b8..8500d3aa91fec 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_ioctl_test.cpp
@@ -77,7 +77,8 @@ TEST(SanitizerIoctl, Fixup) {
 // Test decoding KVM ioctl numbers.
 TEST(SanitizerIoctl, KVM_GET_MP_STATE) {
   ioctl_desc desc;
-  unsigned int desc_value = SANITIZER_MIPS ? 0x4004ae98U : 0x8004ae98U;
+  unsigned int desc_value =
+      SANITIZER_MIPS || SANITIZER_SPARC ? 0x4004ae98U : 0x8004ae98U;
   bool res = ioctl_decode(desc_value, &desc);
   EXPECT_TRUE(res);
   EXPECT_EQ(ioctl_desc::WRITE, desc.type);
@@ -86,7 +87,8 @@ TEST(SanitizerIoctl, KVM_GET_MP_STATE) {
 
 TEST(SanitizerIoctl, KVM_GET_LAPIC) {
   ioctl_desc desc;
-  unsigned int desc_value = SANITIZER_MIPS ? 0x4400ae8eU : 0x8400ae8eU;
+  unsigned int desc_value =
+      SANITIZER_MIPS || SANITIZER_SPARC ? 0x4400ae8eU : 0x8400ae8eU;
   bool res = ioctl_decode(desc_value, &desc);
   EXPECT_TRUE(res);
   EXPECT_EQ(ioctl_desc::WRITE, desc.type);

From 708cb9cd3a4b7ca182e35a5b3c17f10f18215084 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 08:57:25 +0200
Subject: [PATCH 125/427] [sanitizer_common] Don't use syscall(SYS_clone) on
 Linux/sparc64 (#100534)

```
  SanitizerCommon-Unit :: ./Sanitizer-sparc-Test/SanitizerCommon/StartSubprocessTest
```
and every single test using the `llvm-symbolizer` `FAIL` on
Linux/sparc64 in a very weird way: when using `StartSubprocess`, there's
a call to `internal_fork`, but we never reach `internal_execve`.
`internal_fork` is implemented using `syscall(SYS_clone)`. The calling
convention of that syscall already varies considerably between targets,
but as documented in `clone(2)`, SPARC again is widely different.
Instead of trying to match `glibc` here, this patch just calls `__fork`.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit 1c53b907bd6348138a59da270836fc9b4c161a07)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 1d6a55bdb7f38..50e41da68d959 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -826,10 +826,16 @@ uptr internal_sigaltstack(const void *ss, void *oss) {
   return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
 }
 
+extern "C" pid_t __fork(void);
+
 int internal_fork() {
 #    if SANITIZER_LINUX
 #      if SANITIZER_S390
   return internal_syscall(SYSCALL(clone), 0, SIGCHLD);
+#      elif SANITIZER_SPARC
+  // The clone syscall interface on SPARC differs massively from the rest,
+  // so fall back to __fork.
+  return __fork();
 #      else
   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
 #      endif

From 97747fb4686939771854e7c9c4e6d9f9bfb59c81 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 09:00:20 +0200
Subject: [PATCH 126/427] [sanitizer_common] Adjust signal_send.cpp for
 Linux/sparc64 (#100538)

```
  SanitizerCommon-ubsan-sparc-Linux :: Linux/signal_send.cpp
```
currently `FAIL`s on Linux/sparc64 (32 and 64-bit). Instead of the
expected values for `SIGUSR1` (`10`) and `SIGUSR1` (`12`), that target
uses `30` and `31`.

On Linux/x86_64, the signals get their values from
`x86_64-linux-gnu/bits/signum-generic.h`, to be overridden in
`x86_64-linux-gnu/bits/signum.h`. On Linux/sparc64 OTOH, the definitions
are from `sparc64-linux-gnu/bits/signum-arch.h` and remain that way.
There's no `signum.h` at all.

The patch allows for both values.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit 7cecbdfe4eac3fd7268532426fb6b13e51b8720d)
---
 .../test/sanitizer_common/TestCases/Linux/signal_send.cpp     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
index 035a5a8df77ae..638be63397dc6 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
@@ -62,14 +62,14 @@ void test_sigwait() {
   int res;
   res = fork_and_signal(s);
   fprintf(stderr, "fork_and_signal with SIGUSR1,2: %d\n", res);
-  // CHECK: died with sig 10
+  // CHECK: died with sig {{10|30}}
   // CHECK: fork_and_signal with SIGUSR1,2: 0
 
   // test sigandset... s should only have SIGUSR2 now
   s = sigset_and(s, mkset(1, SIGUSR2));
   res = fork_and_signal(s);
   fprintf(stderr, "fork_and_signal with SIGUSR2: %d\n", res);
-  // CHECK: died with sig 12
+  // CHECK: died with sig {{12|31}}
   // CHECK: fork_and_signal with SIGUSR2: 0
 }
 

From 282f103026c78e1881843aee61ac412c153f6df9 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 09:03:00 +0200
Subject: [PATCH 127/427] [sanitizer_common] Fix internal_*stat on
 Linux/sparc64 (#101012)

```
  SanitizerCommon-Unit :: ./Sanitizer-sparcv9-Test/SanitizerCommon/FileOps
```
`FAIL`s on 64-bit Linux/sparc64:
```
projects/compiler-rt/lib/sanitizer_common/tests/./Sanitizer-sparcv9-Test --gtest_filter=SanitizerCommon.FileOps
--
compiler-rt/lib/sanitizer_common/tests/sanitizer_libc_test.cpp:144: Failure
Expected equality of these values:
  len1 + len2
    Which is: 10
  fsize
    Which is: 1721875535
```
The issue is similar to the mips64 case: the Linux/sparc64 `*stat`
syscalls take a `struct kernel_stat64 *` arg. Also the syscalls actually
used differ.

This patch handles this, adopting the mips64 code to avoid too much
duplication.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit fcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp  | 41 +++++++++++++++----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 50e41da68d959..8d375ffcd079c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -33,11 +33,15 @@
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
-// sys/stat.h, we use this trick.
-#  if SANITIZER_MIPS64
+// sys/stat.h, we use this trick.  sparc64 is similar, using
+// syscall(__NR_stat64) and struct kernel_stat64.
+#  if SANITIZER_MIPS64 || SANITIZER_SPARC64
 #    include <asm/unistd.h>
 #    include <sys/types.h>
 #    define stat kernel_stat
+#    if SANITIZER_SPARC64
+#      define stat64 kernel_stat64
+#    endif
 #    if SANITIZER_GO
 #      undef st_atime
 #      undef st_mtime
@@ -48,6 +52,7 @@
 #    endif
 #    include <asm/stat.h>
 #    undef stat
+#    undef stat64
 #  endif
 
 #  include <dlfcn.h>
@@ -285,8 +290,7 @@ uptr internal_ftruncate(fd_t fd, uptr size) {
   return res;
 }
 
-#    if (!SANITIZER_LINUX_USES_64BIT_SYSCALLS || SANITIZER_SPARC) && \
-        SANITIZER_LINUX
+#    if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -327,7 +331,12 @@ static void statx_to_stat(struct statx *in, struct stat *out) {
 }
 #    endif
 
-#    if SANITIZER_MIPS64
+#    if SANITIZER_MIPS64 || SANITIZER_SPARC64
+#      if SANITIZER_MIPS64
+typedef struct kernel_stat kstat_t;
+#      else
+typedef struct kernel_stat64 kstat_t;
+#      endif
 // Undefine compatibility macros from <sys/stat.h>
 // so that they would not clash with the kernel_stat
 // st_[a|m|c]time fields
@@ -345,7 +354,7 @@ static void statx_to_stat(struct statx *in, struct stat *out) {
 #        undef st_mtime_nsec
 #        undef st_ctime_nsec
 #      endif
-static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+static void kernel_stat_to_stat(kstat_t *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
@@ -391,6 +400,12 @@ uptr internal_stat(const char *path, void *buf) {
           !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           0);
+#      elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, 0);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #      else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -423,6 +438,12 @@ uptr internal_lstat(const char *path, void *buf) {
           !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
+#      elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
 #      else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
@@ -442,10 +463,16 @@ uptr internal_fstat(fd_t fd, void *buf) {
 #    if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
 #      if SANITIZER_MIPS64
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
-  struct kernel_stat kbuf;
+  kstat_t kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
+#      elif SANITIZER_LINUX && SANITIZER_SPARC64
+  // For sparc64, fstat64 syscall fills buffer in the format of kernel_stat64
+  kstat_t kbuf;
+  int res = internal_syscall(SYSCALL(fstat64), fd, &kbuf);
+  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+  return res;
 #      elif SANITIZER_LINUX && defined(__loongarch__)
   struct statx bufx;
   int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH,

From e2f25af711425fb238317582441f4bda56131891 Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Fri, 2 Aug 2024 23:07:21 +0100
Subject: [PATCH 128/427] [ADT] Add `<cstdint>` to SmallVector (#101761)

SmallVector uses `uint32_t`, `uint64_t` without including `<cstdint>`
which fails to build w/ GCC 15 after a change in libstdc++ [0]

[0] https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3a817a4a5a6d94da9127af3be9f84a74e3076ee2

(cherry picked from commit 7e44305041d96b064c197216b931ae3917a34ac1)
---
 llvm/include/llvm/ADT/SmallVector.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 09676d792dfeb..17444147b102a 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <functional>

From 01a49d21c757fa80b3d6cf5cb153840cc94f8830 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sat, 3 Aug 2024 11:19:00 +0200
Subject: [PATCH 129/427] [libc++][bit] Improves rotate functions. (#98032)

Investigating #96612 shows our implementation was different from the
Standard and could cause UB. Testing the codegen showed quite a bit of
assembly generated for these functions. The functions have been written
differently which allows Clang to optimize the code to use simple CPU
rotate instructions.

Fixes: https://github.com/llvm/llvm-project/issues/96612
---
 libcxx/include/__bit/rotate.h                 | 37 +++++++++++++------
 .../std/numerics/bit/bitops.rot/rotl.pass.cpp |  4 ++
 .../std/numerics/bit/bitops.rot/rotr.pass.cpp |  4 ++
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/libcxx/include/__bit/rotate.h b/libcxx/include/__bit/rotate.h
index d848056c3350d..90e430e9d0425 100644
--- a/libcxx/include/__bit/rotate.h
+++ b/libcxx/include/__bit/rotate.h
@@ -20,24 +20,37 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+// Writing two full functions for rotl and rotr makes it easier for the compiler
+// to optimize the code. On x86 this function becomes the ROL instruction and
+// the rotr function becomes the ROR instruction.
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __t, int __cnt) _NOEXCEPT {
-  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type");
-  const unsigned int __dig = numeric_limits<_Tp>::digits;
-  if ((__cnt % __dig) == 0)
-    return __t;
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type");
+  const int __N = numeric_limits<_Tp>::digits;
+  int __r       = __s % __N;
+
+  if (__r == 0)
+    return __x;
 
-  if (__cnt < 0) {
-    __cnt *= -1;
-    return (__t << (__cnt % __dig)) | (__t >> (__dig - (__cnt % __dig))); // rotr with negative __cnt is similar to rotl
-  }
+  if (__r > 0)
+    return (__x << __r) | (__x >> (__N - __r));
 
-  return (__t >> (__cnt % __dig)) | (__t << (__dig - (__cnt % __dig)));
+  return (__x >> -__r) | (__x << (__N + __r));
 }
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __t, int __cnt) _NOEXCEPT {
-  return std::__rotr(__t, -__cnt);
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type");
+  const int __N = numeric_limits<_Tp>::digits;
+  int __r       = __s % __N;
+
+  if (__r == 0)
+    return __x;
+
+  if (__r > 0)
+    return (__x >> __r) | (__x << (__N - __r));
+
+  return (__x << -__r) | (__x >> (__N + __r));
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
index 50e498b5761e5..16eabbd2a5a4d 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
     assert(std::rotl(T(max - 1), 5) == T(max - 32));
     assert(std::rotl(T(max - 1), 6) == T(max - 64));
     assert(std::rotl(T(max - 1), 7) == T(max - 128));
+    assert(std::rotl(T(max - 1), std::numeric_limits<int>::max()) ==
+           std::rotl(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits));
 
     assert(std::rotl(T(max - 1), -1) == T(max - highbit));
     assert(std::rotl(T(max - 1), -2) == T(max - (highbit >> 1)));
@@ -49,6 +51,8 @@ constexpr bool test()
     assert(std::rotl(T(max - 1), -5) == T(max - (highbit >> 4)));
     assert(std::rotl(T(max - 1), -6) == T(max - (highbit >> 5)));
     assert(std::rotl(T(max - 1), -7) == T(max - (highbit >> 6)));
+    assert(std::rotl(T(max - 1), std::numeric_limits<int>::min()) ==
+           std::rotl(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits));
 
     assert(std::rotl(T(1), 0) == T(1));
     assert(std::rotl(T(1), 1) == T(2));
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
index 00c9e617d2edf..53405588266f7 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
     assert(std::rotr(T(max - 1), 5) == T(max - (highbit >> 4)));
     assert(std::rotr(T(max - 1), 6) == T(max - (highbit >> 5)));
     assert(std::rotr(T(max - 1), 7) == T(max - (highbit >> 6)));
+    assert(std::rotr(T(max - 1), std::numeric_limits<int>::max()) ==
+           std::rotr(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits));
 
     assert(std::rotr(T(max - 1), -1) == T(max - 2));
     assert(std::rotr(T(max - 1), -2) == T(max - 4));
@@ -49,6 +51,8 @@ constexpr bool test()
     assert(std::rotr(T(max - 1), -5) == T(max - 32));
     assert(std::rotr(T(max - 1), -6) == T(max - 64));
     assert(std::rotr(T(max - 1), -7) == T(max - 128));
+    assert(std::rotr(T(max - 1), std::numeric_limits<int>::min()) ==
+           std::rotr(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits));
 
     assert(std::rotr(T(128), 0) == T(128));
     assert(std::rotr(T(128), 1) == T(64));

From ad5beb7386ad86da7a568d5c5304a3acbd2b7296 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Fri, 2 Aug 2024 10:29:08 +0100
Subject: [PATCH 130/427] [AArch64] Avoid inlining if ZT0 needs preserving.
 (#101343)

Inlining may result in different behaviour when the callee clobbers ZT0,
because normally the call-site will have code to preserve ZT0. When
inlining the function this code to preserve ZT0 will no longer be
emitted, and so the resulting behaviour of the program is changed.

(cherry picked from commit fb470db7b3a8ce6853e8bf17d235617a2fa79434)
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  3 +-
 .../Inline/AArch64/sme-pstateza-attrs.ll      | 45 +++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 45148449dfb82..9630b36b99348 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -254,7 +254,8 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
     return false;
 
   if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
-      CallerAttrs.requiresSMChange(CalleeAttrs)) {
+      CallerAttrs.requiresSMChange(CalleeAttrs) ||
+      CallerAttrs.requiresPreservingZT0(CalleeAttrs)) {
     if (hasPossibleIncompatibleOps(Callee))
       return false;
   }
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
index 816492768cc0f..5e638103a2b06 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstateza-attrs.ll
@@ -231,6 +231,51 @@ define void @shared_za_caller_private_za_callee_call_tpidr2_restore_dont_inline(
   ret void
 }
 
+define void @nonzt0_callee() {
+; CHECK-LABEL: define void @nonzt0_callee
+; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @inlined_body()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; inlineasm", ""()
+  call void @inlined_body()
+  ret void
+}
+
+define void @shared_zt0_caller_nonzt0_callee_dont_inline() "aarch64_inout_zt0" {
+; CHECK-LABEL: define void @shared_zt0_caller_nonzt0_callee_dont_inline
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @nonzt0_callee()
+; CHECK-NEXT:    ret void
+;
+  call void @nonzt0_callee()
+  ret void
+}
+
+define void @shared_zt0_callee() "aarch64_inout_zt0" {
+; CHECK-LABEL: define void @shared_zt0_callee
+; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @inlined_body()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; inlineasm", ""()
+  call void @inlined_body()
+  ret void
+}
+
+define void @shared_zt0_caller_shared_zt0_callee_inline() "aarch64_inout_zt0" {
+; CHECK-LABEL: define void @shared_zt0_caller_shared_zt0_callee_inline
+; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @inlined_body()
+; CHECK-NEXT:    ret void
+;
+  call void @shared_zt0_callee()
+  ret void
+}
+
 declare void @__arm_za_disable()
 declare void @__arm_tpidr2_save()
 declare void @__arm_tpidr2_restore(ptr)

From de97808c777e711b8a2010992006f89d6a5a6653 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Fri, 2 Aug 2024 15:56:52 +0100
Subject: [PATCH 131/427] [AArch64] Avoid NEON dot product in
 streaming[-compatible] functions (#101677)

The NEON dot product is not valid in streaming mode.
A follow-up patch will improve codegen for these operations.

(cherry picked from commit 12937b1bfb23cca4731fa274f3358f7286cc6784)
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   3 +
 ...-streaming-mode-fixed-length-reductions.ll | 143 ++++++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6d413a09407a9..62078822c89b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17719,6 +17719,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
 // and generate vecreduce.add(concat_vector(DOT, DOT2, ..)).
 static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
                                           const AArch64Subtarget *ST) {
+  if (!ST->isNeonAvailable())
+    return SDValue();
+
   if (!ST->hasDotProd())
     return performVecReduceAddCombineWithUADDLP(N, DAG);
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
new file mode 100644
index 0000000000000..00a15f4bcd639
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
+; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
+; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
+; CHECK-LABEL: reduce_uaddv_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll2 v2.8h, v1.16b, #0
+; CHECK-NEXT:    ushll2 v3.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    uaddl2 v4.4s, v3.8h, v2.8h
+; CHECK-NEXT:    uaddl v2.4s, v3.4h, v2.4h
+; CHECK-NEXT:    uaddl2 v5.4s, v0.8h, v1.8h
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    add v1.4s, v5.4s, v4.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; DOT-LABEL: reduce_uaddv_v16i8:
+; DOT:       // %bb.0:
+; DOT-NEXT:    movi v2.16b, #1
+; DOT-NEXT:    movi v3.2d, #0000000000000000
+; DOT-NEXT:    udot v3.4s, v1.16b, v2.16b
+; DOT-NEXT:    udot v3.4s, v0.16b, v2.16b
+; DOT-NEXT:    addv s0, v3.4s
+; DOT-NEXT:    fmov w0, s0
+; DOT-NEXT:    ret
+;
+; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
+; STREAMING-SVE:       // %bb.0:
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; STREAMING-SVE-NEXT:    uunpklo z2.h, z1.b
+; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
+; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
+; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
+; STREAMING-SVE-NEXT:    uunpklo z4.s, z2.h
+; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z2.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z6.s, z3.h
+; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
+; STREAMING-SVE-NEXT:    mov z5.d, z1.d
+; STREAMING-SVE-NEXT:    uunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
+; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
+; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
+; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z1.s, z1.h
+; STREAMING-SVE-NEXT:    uunpklo z0.s, z0.h
+; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
+; STREAMING-SVE-NEXT:    uunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
+; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
+; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
+; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
+; STREAMING-SVE-NEXT:    fmov x0, d0
+; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-SVE-NEXT:    ret
+  %1 = zext <32 x i8> %a to <32 x i32>
+  %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
+  ret i32 %2
+}
+
+define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
+; CHECK-LABEL: reduce_saddv_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll2 v2.8h, v1.16b, #0
+; CHECK-NEXT:    sshll2 v3.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    saddl2 v4.4s, v3.8h, v2.8h
+; CHECK-NEXT:    saddl v2.4s, v3.4h, v2.4h
+; CHECK-NEXT:    saddl2 v5.4s, v0.8h, v1.8h
+; CHECK-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    add v1.4s, v5.4s, v4.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+;
+; DOT-LABEL: reduce_saddv_v16i8:
+; DOT:       // %bb.0:
+; DOT-NEXT:    movi v2.16b, #1
+; DOT-NEXT:    movi v3.2d, #0000000000000000
+; DOT-NEXT:    sdot v3.4s, v1.16b, v2.16b
+; DOT-NEXT:    sdot v3.4s, v0.16b, v2.16b
+; DOT-NEXT:    addv s0, v3.4s
+; DOT-NEXT:    fmov w0, s0
+; DOT-NEXT:    ret
+;
+; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
+; STREAMING-SVE:       // %bb.0:
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; STREAMING-SVE-NEXT:    sunpklo z2.h, z1.b
+; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
+; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
+; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
+; STREAMING-SVE-NEXT:    sunpklo z4.s, z2.h
+; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z2.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z6.s, z3.h
+; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
+; STREAMING-SVE-NEXT:    mov z5.d, z1.d
+; STREAMING-SVE-NEXT:    sunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
+; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
+; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
+; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z1.s, z1.h
+; STREAMING-SVE-NEXT:    sunpklo z0.s, z0.h
+; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
+; STREAMING-SVE-NEXT:    sunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
+; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
+; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
+; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
+; STREAMING-SVE-NEXT:    fmov x0, d0
+; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-SVE-NEXT:    ret
+  %1 = sext <32 x i8> %a to <32 x i32>
+  %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
+  ret i32 %2
+}

From 91d3e76a2b670aece2a15c54f3f9893c0ed96ba1 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Fri, 2 Aug 2024 18:00:59 +0100
Subject: [PATCH 132/427] [AArch64][SME] Rewrite __arm_sc_memset to remove
 invalid instruction (#101522)

The implementation of __arm_sc_memset in compiler-rt contains
a Neon dup instruction which is not valid in streaming mode. This
patch rewrites the function, using an SVE mov instruction if available.

(cherry picked from commit d6649f2d4871c4535ae0519920e36100748890c4)
---
 .../lib/builtins/aarch64/sme-libc-mem-routines.S       | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
index 926ad3b1b6331..0318d9a6f1ebd 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S
@@ -252,7 +252,15 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
 #define zva_val  x5
 
 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
-        dup     v0.16B, valw
+#ifdef __ARM_FEATURE_SVE
+        mov     z0.b, valw
+#else
+        bfi valw, valw, #8, #8
+        bfi valw, valw, #16, #16
+        bfi val, val, #32, #32
+        fmov d0, val
+        fmov v0.d[1], val
+#endif
         add     dstend2, dstin, count
 
         cmp     count, 96

From dcaa1cde296eb517f19fe49f3cdf5eaff874ebe0 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 5 Aug 2024 11:25:44 +0100
Subject: [PATCH 133/427] [LLVM][TTI][SME] Allow optional auto-vectorisation
 for streaming functions. (#101679)

The command line option enable-scalable-autovec-in-streaming-mode is
used to enable scalable vectors but the same check is missing from
enableScalableVectorization, which is blocking auto-vectorisation.

(cherry picked from commit 7775a4882d7105fde7f7a81f3c72567d39afce45)
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  5 ++
 .../AArch64/AArch64TargetTransformInfo.h      |  2 +-
 .../AArch64/streaming-vectorization.ll        | 56 +++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 9630b36b99348..237ba67b668fc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2296,6 +2296,11 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
   return std::nullopt;
 }
 
+bool AArch64TTIImpl::enableScalableVectorization() const {
+  return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
+                                  EnableScalableAutovecInStreamingMode);
+}
+
 TypeSize
 AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
   switch (K) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a9189fd53f40b..4a6457d7a7dbf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -381,7 +381,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
     return ST->isSVEorStreamingSVEAvailable();
   }
 
-  bool enableScalableVectorization() const { return ST->isSVEAvailable(); }
+  bool enableScalableVectorization() const;
 
   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
                                    ElementCount VF) const;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll
new file mode 100644
index 0000000000000..924d4bfb7836a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-vectorization.ll
@@ -0,0 +1,56 @@
+; REQUIRES: asserts
+; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOVEC
+; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -enable-scalable-autovec-in-streaming-mode < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,VEC
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @normal_function(ptr %a, ptr %b, ptr %c) #0 {
+; CHECK: LV: Checking a loop in 'normal_function'
+; CHECK: LV: Scalable vectorization is available
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @streaming_function(ptr %a, ptr %b, ptr %c) #0 "aarch64_pstate_sm_enabled" {
+; CHECK: LV: Checking a loop in 'streaming_function'
+; VEC: LV: Scalable vectorization is available
+; NOVEC: LV: Scalable vectorization is explicitly disabled
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
+  %zext = zext i8 %1 to i32
+  %add = add nsw i32 %zext, %0
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+attributes #0 = { vscale_range(1, 16) "target-features"="+sve,+sme" }

From 4ab53133ed3612cb4d532efd84823f9e5be22c0f Mon Sep 17 00:00:00 2001
From: Kiran Chandramohan <kiran.chandramohan@arm.com>
Date: Mon, 5 Aug 2024 12:43:37 +0100
Subject: [PATCH 134/427] [Driver] Restrict Ofast deprecation help message to
 Clang (#101682)

The discussion about this in Flang
(https://discourse.llvm.org/t/rfc-deprecate-ofast-in-flang/80243) has
not concluded hence restricting the deprecation only to Clang.

(cherry picked from commit e60ee1f2d70bdb0ac87b09ae685d669d8543b7bd)
---
 clang/include/clang/Driver/Options.td | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 359a698ea87dd..014a2bd85fdc6 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -932,8 +932,9 @@ def O_flag : Flag<["-"], "O">, Visibility<[ClangOption, CC1Option, FC1Option]>,
   Alias<O>, AliasArgs<["1"]>;
 def Ofast : Joined<["-"], "Ofast">, Group<O_Group>,
   Visibility<[ClangOption, CC1Option, FlangOption]>,
-  HelpText<"Deprecated; use '-O3 -ffast-math' for the same behavior,"
-  " or '-O3' to enable only conforming optimizations">;
+  HelpTextForVariants<[ClangOption, CC1Option],
+                      "Deprecated; use '-O3 -ffast-math' for the same behavior,"
+                      " or '-O3' to enable only conforming optimizations">;
 def P : Flag<["-"], "P">,
   Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   Group<Preprocessor_Group>,

From 09831f28cf1e7630569dc672701fde1be90f8950 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Mon, 5 Aug 2024 14:22:07 +0200
Subject: [PATCH 135/427] [Clang] SFINAE on mismatching pack length during
 constraint satisfaction checking (#101879)

If a fold expanded constraint would expand packs of different size, it
is not a valid pack expansion and it is not satisfied. This should not
produce an error.

Fixes #99430

(cherry picked from commit da380b26e4748ade5a8dba85b7df5e1c4eded8bc)
---
 clang/lib/Sema/SemaConcept.cpp          |  4 ++++
 clang/test/SemaCXX/cxx2c-fold-exprs.cpp | 30 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 9e16b67284be4..c34d32002b5ad 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -531,6 +531,10 @@ static ExprResult calculateConstraintSatisfaction(
 
     std::optional<unsigned>
     EvaluateFoldExpandedConstraintSize(const CXXFoldExpr *FE) const {
+
+      // We should ignore errors in the presence of packs of different size.
+      Sema::SFINAETrap Trap(S);
+
       Expr *Pattern = FE->getPattern();
 
       SmallVector<UnexpandedParameterPack, 2> Unexpanded;
diff --git a/clang/test/SemaCXX/cxx2c-fold-exprs.cpp b/clang/test/SemaCXX/cxx2c-fold-exprs.cpp
index 1e0bc7bcfb4e7..0674135aac483 100644
--- a/clang/test/SemaCXX/cxx2c-fold-exprs.cpp
+++ b/clang/test/SemaCXX/cxx2c-fold-exprs.cpp
@@ -275,3 +275,33 @@ static_assert(S<int>::g<int>() == 2); // expected-error {{call to 'g' is ambiguo
 
 
 }
+
+namespace GH99430 {
+
+template <class _Ty1, class _Ty2>
+using _Synth_three_way_result = int;
+
+template <class... _Types>
+class tuple;
+
+template <int _Index>
+struct tuple_element;
+
+template <class, int...>
+struct _Three_way_comparison_result_with_tuple_like {
+  using type = int;
+};
+
+template <class... _TTypes, int... _Indices>
+  requires(requires {
+    typename _Synth_three_way_result<_TTypes, tuple_element<_Indices>>;
+  } && ...)
+
+struct _Three_way_comparison_result_with_tuple_like<tuple<_TTypes...>, _Indices...>{
+    using type = long;
+};
+
+static_assert(__is_same_as(_Three_way_comparison_result_with_tuple_like<tuple<int>, 0, 1>::type, int));
+static_assert(__is_same_as(_Three_way_comparison_result_with_tuple_like<tuple<int>, 0>::type, long));
+
+}

From 428c151659400de2d71f9012d43b3652e821b61f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 5 Aug 2024 11:52:52 -0700
Subject: [PATCH 136/427] [Driver] Temporarily probe aarch64-linux-gnu GCC
 installation

As the comment explains, `*Triples[]` lists are discouraged and not
comprehensive anyway (e.g.
aarch64-unknown-linux-gnu/aarch64-unknown-linux-musl/aarch64-amazon-linux
do not work).

Boost incorrectly specifies --target=arm64-pc-linux ("arm64" should not
be used for Linux) and expects to probe "aarch64-linux-gnu". Add this
temporary workaround for the 19.x releases.
---
 clang/lib/Driver/ToolChains/Gnu.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 52c2ee90b1b28..543f3965dfd4f 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2463,7 +2463,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
   // lists should shrink over time. Please don't add more elements to *Triples.
   static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
   static const char *const AArch64Triples[] = {
-      "aarch64-none-linux-gnu", "aarch64-redhat-linux", "aarch64-suse-linux"};
+      "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux",
+      "aarch64-suse-linux"};
   static const char *const AArch64beLibDirs[] = {"/lib"};
   static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu"};
 

From e1c32a819d223aed47ff4253fde919fb0cb89b37 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 5 Aug 2024 13:30:04 -0700
Subject: [PATCH 137/427] workflows/release-tasks: Add missing permissions for
 release binaries (#102023)

Now that the release binaries create artifact attestations, we need to
ensure that we call the workflow with the correct permissions.

(cherry picked from commit dc349a3f47882cdac7112c763d2964b59e77356a)
---
 .github/workflows/release-tasks.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index 7dd4c306671b7..b7fea0785fed2 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -78,6 +78,8 @@ jobs:
     name: Build Release Binaries
     permissions:
       contents: write
+      id-token: write
+      attestations: write
     needs:
       - validate-tag
       - release-create

From b5cb9081fa4560ec9d87a5d81aa027f7239c61c7 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 5 Aug 2024 14:38:56 -0700
Subject: [PATCH 138/427] workflows/release-binaries: Give attestation
 artifacts a unique name (#102041)

We need a different attestation for each supported architecture, so
there artifacts all need to have a different name.

The upload step is run on a Linux runner, so no matter which
architecture's binary is being uploaded the runner.os and runner.arch
variables would always be 'Linux' and 'X64' and so we can't use them for
naming the artifact.

(cherry picked from commit 3c8dadda3aa20b89fb5ad29ae31380d9594c3430)
---
 .github/workflows/release-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 7cc8b7a1e56e8..fae04e19b246b 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -442,7 +442,7 @@ jobs:
     - name: Upload Build Provenance
       uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 #v4.3.3
       with:
-        name: ${{ runner.os }}-${{ runner.arch }}-release-binary-attestation
+        name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation
         path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
 
     - name: Upload Release

From aa3bbf147fbbd289485e7bdf12d27c1d3c0169c6 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 6 Aug 2024 09:08:41 +0200
Subject: [PATCH 139/427] [BinaryFormat] Disable MachOTest.UnalignedLC on SPARC
 (#100086)

As discussed in Issue #86793, the `MachOTest.UnalignedLC` test dies with
`SIGBUS` on SPARC, a strict-alignment target. It simply cannot work
there. Besides, the test invokes undefined behaviour on big-endian
targets, so this patch disables it on all of those.

Tested on `sparcv9-sun-solaris2.11` and `amd64-pc-solaris2.11`.

(cherry picked from commit 3a226dbe27ac7c7d935bc0968e84e31798a01207)
---
 llvm/unittests/BinaryFormat/MachOTest.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/unittests/BinaryFormat/MachOTest.cpp b/llvm/unittests/BinaryFormat/MachOTest.cpp
index 391298ff38d76..78b20c28a9549 100644
--- a/llvm/unittests/BinaryFormat/MachOTest.cpp
+++ b/llvm/unittests/BinaryFormat/MachOTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/bit.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/TargetParser/Triple.h"
 #include "gtest/gtest.h"
@@ -13,7 +14,15 @@
 using namespace llvm;
 using namespace llvm::MachO;
 
-TEST(MachOTest, UnalignedLC) {
+#if BYTE_ORDER == BIG_ENDIAN
+// As discussed in Issue #86793, this test cannot work on a strict-alignment
+// targets like SPARC.  Besides, it's undefined behaviour on big-endian hosts.
+#define MAYBE_UnalignedLC DISABLED_UnalignedLC
+#else
+#define MAYBE_UnalignedLC UnalignedLC
+#endif
+
+TEST(MachOTest, MAYBE_UnalignedLC) {
   unsigned char Valid32BitMachO[] = {
       0xCE, 0xFA, 0xED, 0xFE, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
       0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,

From 1545897f089d3e16edc5142f1c6e3238ba64b26b Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Tue, 6 Aug 2024 09:58:36 +0100
Subject: [PATCH 140/427] [LLDB] Add `<cstdint>` to AddressableBits (#102110)

(cherry picked from commit bb59f04e7e75dcbe39f1bf952304a157f0035314)
---
 lldb/include/lldb/Utility/AddressableBits.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/include/lldb/Utility/AddressableBits.h b/lldb/include/lldb/Utility/AddressableBits.h
index 0d27c3561ec27..8c7a1ec5f52c0 100644
--- a/lldb/include/lldb/Utility/AddressableBits.h
+++ b/lldb/include/lldb/Utility/AddressableBits.h
@@ -12,6 +12,8 @@
 #include "lldb/lldb-forward.h"
 #include "lldb/lldb-public.h"
 
+#include <cstdint>
+
 namespace lldb_private {
 
 /// \class AddressableBits AddressableBits.h "lldb/Core/AddressableBits.h"

From 87656b311523827c1960a66009cc6cf81c8abb70 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 26 Jul 2024 13:10:16 +0100
Subject: [PATCH 141/427] [LAA] Refine stride checks for SCEVs during
 dependence analysis. (#99577)

Update getDependenceDistanceStrideAndSize to reason about different
combinations of strides directly and explicitly.

Update getPtrStride to return 0 for invariant pointers.

Then proceed by checking the strides.

If either source or sink are not strided by a constant (i.e. not a
non-wrapping AddRec) or invariant, the accesses may overlap
with earlier or later iterations and we cannot generate runtime
checks to disambiguate them.

Otherwise they are either loop invariant or strided. In that case, we
can generate a runtime check to disambiguate them.

If both are strided by constants, we proceed as previously.

This is an alternative to
https://github.com/llvm/llvm-project/pull/99239 and also replaces
additional checks if the underlying object is loop-invariant.

Fixes https://github.com/llvm/llvm-project/issues/87189.

PR: https://github.com/llvm/llvm-project/pull/99577
---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  23 ++--
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 121 ++++++++----------
 .../load-store-index-loaded-in-loop.ll        |  26 ++--
 .../pointer-with-unknown-bounds.ll            |   4 +-
 .../LoopAccessAnalysis/print-order.ll         |   6 +-
 .../LoopAccessAnalysis/select-dependence.ll   |   4 +-
 .../LoopAccessAnalysis/symbolic-stride.ll     |   4 +-
 7 files changed, 87 insertions(+), 101 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index afafb74bdcb0a..95a74b91f7acb 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -199,9 +199,8 @@ class MemoryDepChecker {
   /// Check whether the dependencies between the accesses are safe.
   ///
   /// Only checks sets with elements in \p CheckDeps.
-  bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
-                   const DenseMap<Value *, SmallVector<const Value *, 16>>
-                       &UnderlyingObjects);
+  bool areDepsSafe(const DepCandidates &AccessSets,
+                   const MemAccessInfoList &CheckDeps);
 
   /// No memory dependence was encountered that would inhibit
   /// vectorization.
@@ -351,11 +350,8 @@ class MemoryDepChecker {
   /// element access it records this distance in \p MinDepDistBytes (if this
   /// distance is smaller than any other distance encountered so far).
   /// Otherwise, this function returns true signaling a possible dependence.
-  Dependence::DepType
-  isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
-              unsigned BIdx,
-              const DenseMap<Value *, SmallVector<const Value *, 16>>
-                  &UnderlyingObjects);
+  Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
+                                  const MemAccessInfo &B, unsigned BIdx);
 
   /// Check whether the data dependence could prevent store-load
   /// forwarding.
@@ -392,11 +388,9 @@ class MemoryDepChecker {
   /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
   /// BIsWrite).
   std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
-  getDependenceDistanceStrideAndSize(
-      const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
-      Instruction *BInst,
-      const DenseMap<Value *, SmallVector<const Value *, 16>>
-          &UnderlyingObjects);
+  getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
+                                     const MemAccessInfo &B,
+                                     Instruction *BInst);
 };
 
 class RuntimePointerChecking;
@@ -797,7 +791,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
                           Value *Ptr);
 
 /// If the pointer has a constant stride return it in units of the access type
-/// size.  Otherwise return std::nullopt.
+/// size. If the pointer is loop-invariant, return 0. Otherwise return
+/// std::nullopt.
 ///
 /// Ensure that it does not wrap in the address space, assuming the predicate
 /// associated with \p PSE is true.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 84214c47a10e1..f3fc69c86cd1e 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -728,11 +728,6 @@ class AccessAnalysis {
 
   MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
 
-  const DenseMap<Value *, SmallVector<const Value *, 16>> &
-  getUnderlyingObjects() {
-    return UnderlyingObjects;
-  }
-
 private:
   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
 
@@ -1459,22 +1454,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
 }
 
 /// Check whether the access through \p Ptr has a constant stride.
-std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
-                                          Type *AccessTy, Value *Ptr,
-                                          const Loop *Lp,
-                                          const DenseMap<Value *, const SCEV *> &StridesMap,
-                                          bool Assume, bool ShouldCheckWrap) {
+std::optional<int64_t>
+llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+                   const Loop *Lp,
+                   const DenseMap<Value *, const SCEV *> &StridesMap,
+                   bool Assume, bool ShouldCheckWrap) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
+  if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
+    return {0};
+
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
-
   if (isa<ScalableVectorType>(AccessTy)) {
     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
                       << "\n");
     return std::nullopt;
   }
 
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
-
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (Assume && !AR)
     AR = PSE.getAsAddRec(Ptr);
@@ -1899,24 +1895,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
   return ScaledDist % Stride;
 }
 
-/// Returns true if any of the underlying objects has a loop varying address,
-/// i.e. may change in \p L.
-static bool
-isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
-                             ScalarEvolution &SE, const Loop *L) {
-  return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
-    return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
-  });
-}
-
 std::variant<MemoryDepChecker::Dependence::DepType,
              MemoryDepChecker::DepDistanceStrideAndSizeInfo>
 MemoryDepChecker::getDependenceDistanceStrideAndSize(
     const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
-    const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
-    const DenseMap<Value *, SmallVector<const Value *, 16>>
-        &UnderlyingObjects) {
-  auto &DL = InnermostLoop->getHeader()->getDataLayout();
+    const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
+  const auto &DL = InnermostLoop->getHeader()->getDataLayout();
   auto &SE = *PSE.getSE();
   auto [APtr, AIsWrite] = A;
   auto [BPtr, BIsWrite] = B;
@@ -1933,12 +1917,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
       BPtr->getType()->getPointerAddressSpace())
     return MemoryDepChecker::Dependence::Unknown;
 
-  int64_t StrideAPtr =
-      getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
-          .value_or(0);
-  int64_t StrideBPtr =
-      getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true)
-          .value_or(0);
+  std::optional<int64_t> StrideAPtr =
+      getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
+  std::optional<int64_t> StrideBPtr =
+      getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
 
   const SCEV *Src = PSE.getSCEV(APtr);
   const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1946,26 +1928,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   // If the induction step is negative we have to invert source and sink of the
   // dependence when measuring the distance between them. We should not swap
   // AIsWrite with BIsWrite, as their uses expect them in program order.
-  if (StrideAPtr < 0) {
+  if (StrideAPtr && *StrideAPtr < 0) {
     std::swap(Src, Sink);
     std::swap(AInst, BInst);
+    std::swap(StrideAPtr, StrideBPtr);
   }
 
   const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
 
   LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
-                    << "(Induction step: " << StrideAPtr << ")\n");
+                    << "\n");
   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
                     << ": " << *Dist << "\n");
 
-  // Needs accesses where the addresses of the accessed underlying objects do
-  // not change within the loop.
-  if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
-                                   InnermostLoop) ||
-      isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
-                                   InnermostLoop))
-    return MemoryDepChecker::Dependence::IndirectUnsafe;
-
   // Check if we can prove that Sink only accesses memory after Src's end or
   // vice versa. At the moment this is limited to cases where either source or
   // sink are loop invariant to avoid compile-time increases. This is not
@@ -1987,12 +1962,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
     }
   }
 
-  // Need accesses with constant strides and the same direction. We don't want
-  // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
-  // could wrap in the address space.
-  if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
-      (StrideAPtr < 0 && StrideBPtr > 0)) {
+  // Need accesses with constant strides and the same direction for further
+  // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
+  // similar code or pointer arithmetic that could wrap in the address space.
+
+  // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
+  // not loop-invariant (stride will be 0 in that case), we cannot analyze the
+  // dependence further and also cannot generate runtime checks.
+  if (!StrideAPtr || !StrideBPtr) {
     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+    return MemoryDepChecker::Dependence::IndirectUnsafe;
+  }
+
+  int64_t StrideAPtrInt = *StrideAPtr;
+  int64_t StrideBPtrInt = *StrideBPtr;
+  LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << StrideAPtrInt
+                    << " Sink induction step: " << StrideBPtrInt << "\n");
+  // At least Src or Sink are loop invariant and the other is strided or
+  // invariant. We can generate a runtime check to disambiguate the accesses.
+  if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
+    return MemoryDepChecker::Dependence::Unknown;
+
+  // Both Src and Sink have a constant stride, check if they are in the same
+  // direction.
+  if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
+      (StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
+    LLVM_DEBUG(
+        dbgs() << "Pointer access with strides in different directions\n");
     return MemoryDepChecker::Dependence::Unknown;
   }
 
@@ -2001,22 +1997,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
   if (!HasSameSize)
     TypeByteSize = 0;
-  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
-                                      std::abs(StrideBPtr), TypeByteSize,
+  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
+                                      std::abs(StrideBPtrInt), TypeByteSize,
                                       AIsWrite, BIsWrite);
 }
 
-MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
-    const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
-    unsigned BIdx,
-    const DenseMap<Value *, SmallVector<const Value *, 16>>
-        &UnderlyingObjects) {
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+                              const MemAccessInfo &B, unsigned BIdx) {
   assert(AIdx < BIdx && "Must pass arguments in program order");
 
   // Get the dependence distance, stride, type size and what access writes for
   // the dependence between A and B.
-  auto Res = getDependenceDistanceStrideAndSize(
-      A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
+  auto Res =
+      getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
   if (std::holds_alternative<Dependence::DepType>(Res))
     return std::get<Dependence::DepType>(Res);
 
@@ -2250,10 +2244,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   return Dependence::BackwardVectorizable;
 }
 
-bool MemoryDepChecker::areDepsSafe(
-    DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
-    const DenseMap<Value *, SmallVector<const Value *, 16>>
-        &UnderlyingObjects) {
+bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
+                                   const MemAccessInfoList &CheckDeps) {
 
   MinDepDistBytes = -1;
   SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2296,8 +2288,8 @@ bool MemoryDepChecker::areDepsSafe(
             if (*I1 > *I2)
               std::swap(A, B);
 
-            Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
-                                                   B.second, UnderlyingObjects);
+            Dependence::DepType Type =
+                isDependent(*A.first, A.second, *B.first, B.second);
             mergeInStatus(Dependence::isSafeForVectorization(Type));
 
             // Gather dependences unless we accumulated MaxDependences
@@ -2652,8 +2644,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
   if (Accesses.isDependencyCheckNeeded()) {
     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
     DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
-                                          Accesses.getDependenciesToCheck(),
-                                          Accesses.getUnderlyingObjects());
+                                          Accesses.getDependenciesToCheck());
 
     if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
index 2e61a28039846..6d8e296ec72fa 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
@@ -9,21 +9,19 @@
 define void @B_indices_loaded_in_loop_A_stored(ptr %A, ptr noalias %B, i64 %N, i64 %off) {
 ; CHECK-LABEL: 'B_indices_loaded_in_loop_A_stored'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %l = load i32, ptr %gep.B, align 4 ->
+; CHECK-NEXT:            store i32 %inc, ptr %gep.B, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %indices = load i8, ptr %gep.A, align 1 ->
+; CHECK-NEXT:            store i32 %l, ptr %gep.C, align 4
+; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.C = getelementptr inbounds i32, ptr %A, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.off
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP1]]:
-; CHECK-NEXT:          (Low: %A High: ((4 * %N) + %A))
-; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
-; CHECK-NEXT:          (Low: (%off + %A) High: (%N + %off + %A))
-; CHECK-NEXT:            Member: {(%off + %A),+,1}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -59,9 +57,9 @@ define void @B_indices_loaded_in_loop_A_not_stored(ptr %A, ptr noalias %B, i64 %
 ; CHECK-LABEL: 'B_indices_loaded_in_loop_A_not_stored'
 ; CHECK-NEXT:    loop:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.B, align 4 ->
 ; CHECK-NEXT:            store i32 %inc, ptr %gep.B, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
index 546a75cf4efd5..28ee6c6f0a89a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
@@ -13,9 +13,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-NEXT: for.body:
 ; CHECK-NEXT:   Report: unsafe dependent memory operations in loop
 ; CHECK-NOT:    Report: cannot identify array bounds
-; CHECK-NEXT:   Unknown data dependence.
+; CHECK-NEXT:   Unsafe indirect dependence.
 ; CHECK-NEXT:     Dependences:
-; CHECK-NEXT:       Unknown:
+; CHECK-NEXT:       IndirectUnsafe:
 ; CHECK-NEXT:         %loadA = load i16, ptr %arrayidxA, align 2 ->
 ; CHECK-NEXT:         store i16 %mul, ptr %arrayidxA, align 2
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 18e45f469b4a3..8ca30383092c6 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -9,8 +9,9 @@
 ; CHECK-LABEL: 'negative_step'
 ; CHECK: LAA: Found an analyzable loop: loop
 ; CHECK: LAA: Checking memory dependencies
-; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
+; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>
 ; CHECK-NEXT: LAA: Distance for   store i32 %add, ptr %gep.A.plus.1, align 4 to   %l = load i32, ptr %gep.A, align 4: -4
+; CHECK-NEXT: LAA: Src induction step: -1 Sink induction step: -1
 ; CHECK-NEXT: LAA: Dependence is negative
 
 define void @negative_step(ptr nocapture %A) {
@@ -41,8 +42,9 @@ exit:
 ; CHECK-LABEL: 'positive_step'
 ; CHECK: LAA: Found an analyzable loop: loop
 ; CHECK: LAA: Checking memory dependencies
-; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
+; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>
 ; CHECK-NEXT: LAA: Distance for   %l = load i32, ptr %gep.A, align 4 to   store i32 %add, ptr %gep.A.minus.1, align 4: -4
+; CHECK-NEXT: LAA: Src induction step: 1 Sink induction step: 1
 ; CHECK-NEXT: LAA: Dependence is negative
 
 define void @positive_step(ptr nocapture %A) {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
index 60fe8b4fcbed4..8bef7583c35c0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
@@ -5,9 +5,9 @@ define void @test(ptr noalias %x, ptr noalias %y, ptr noalias %z) {
 ; CHECK-LABEL: 'test'
 ; CHECK-NEXT:    loop:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %load = load double, ptr %gep.sel, align 8 ->
 ; CHECK-NEXT:            store double %load, ptr %gep.sel2, align 8
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 7c1b11e22aef2..f0aed2421a96e 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -276,9 +276,9 @@ define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i
 ; CHECK-LABEL: 'single_stride_used_for_trip_count'
 ; CHECK-NEXT:    loop:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
 ; CHECK-EMPTY:

From 1eae7f714a6ce9499bc115d5880b81f146cb7eee Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Tue, 23 Jul 2024 19:02:36 +0200
Subject: [PATCH 142/427] [CalcSpillWeights] Avoid x87 excess precision
 influencing weight result

Fixes #99396

The result of `VirtRegAuxInfo::weightCalcHelper` can be influenced by
x87 excess precision, which can result in slightly different register
choices when the compiler is hosted on x86_64 or i386. This leads to
different object file output when cross-compiling to i386, or native.

Similar to 7af3432e22b0, we need to add a `volatile` qualifier to the
local `Weight` variable to force it onto the stack, and avoid the excess
precision. Define `stack_float_t` in `MathExtras.h` for this purpose,
and use it.

(cherry picked from commit c80c09f3e380a0a2b00b36bebf72f43271a564c1)
---
 llvm/include/llvm/Support/MathExtras.h |  8 ++++
 llvm/lib/CodeGen/CalcSpillWeights.cpp  | 11 ++---
 llvm/test/CodeGen/X86/pr99396.ll       | 56 ++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr99396.ll

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 0d0fa826f7bba..e568e42afcf4d 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -770,6 +770,14 @@ std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
 #endif
 }
 
+/// Type to force float point values onto the stack, so that x86 doesn't add
+/// hidden precision, avoiding rounding differences on various platforms.
+#if defined(__i386__) || defined(_M_IX86)
+using stack_float_t = volatile float;
+#else
+using stack_float_t = float;
+#endif
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 1d767a3484bca..9d8c9119f7719 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <tuple>
@@ -257,7 +258,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
       return -1.0f;
     }
 
-    float Weight = 1.0f;
+    // Force Weight onto the stack so that x86 doesn't add hidden precision,
+    // similar to HWeight below.
+    stack_float_t Weight = 1.0f;
     if (IsSpillable) {
       // Get loop info for mi.
       if (MI->getParent() != MBB) {
@@ -284,11 +287,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
     Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
     if (!HintReg)
       continue;
-    // Force hweight onto the stack so that x86 doesn't add hidden precision,
+    // Force HWeight onto the stack so that x86 doesn't add hidden precision,
     // making the comparison incorrectly pass (i.e., 1 > 1 == true??).
-    //
-    // FIXME: we probably shouldn't use floats at all.
-    volatile float HWeight = Hint[HintReg] += Weight;
+    stack_float_t HWeight = Hint[HintReg] += Weight;
     if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
       CopyHints.insert(CopyHint(HintReg, HWeight));
   }
diff --git a/llvm/test/CodeGen/X86/pr99396.ll b/llvm/test/CodeGen/X86/pr99396.ll
new file mode 100644
index 0000000000000..f534d32038c22
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr99396.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=i386-unknown-freebsd -enable-misched -relocation-model=pic | FileCheck %s
+
+@c = external local_unnamed_addr global ptr
+
+declare i32 @fn2() local_unnamed_addr
+
+declare i32 @fn3() local_unnamed_addr
+
+define noundef i32 @fn4() #0 {
+entry:
+  %tmp0 = load i32, ptr @fn4, align 4
+; CHECK: movl fn4@GOT(%ebx), %edi
+; CHECK-NEXT: movl (%edi), %edx
+  %tmp1 = load ptr, ptr @c, align 4
+; CHECK: movl c@GOT(%ebx), %eax
+; CHECK-NEXT: movl (%eax), %esi
+; CHECK-NEXT: testl %esi, %esi
+  %cmp.g = icmp eq ptr %tmp1, null
+  br i1 %cmp.g, label %if.then.g, label %if.end3.g
+
+if.then.g:                                        ; preds = %entry
+  %tmp2 = load i32, ptr inttoptr (i32 1 to ptr), align 4
+  %cmp1.g = icmp slt i32 %tmp2, 0
+  br i1 %cmp1.g, label %if.then2.g, label %if.end3.g
+
+if.then2.g:                                       ; preds = %if.then.g
+  %.g = load volatile i32, ptr null, align 2147483648
+  br label %f.exit
+
+if.end3.g:                                        ; preds = %if.then.g, %entry
+  %h.i.g = icmp eq i32 %tmp0, 0
+  br i1 %h.i.g, label %f.exit, label %while.body.g
+
+while.body.g:                                     ; preds = %if.end3.g, %if.end8.g
+  %buff.addr.019.g = phi ptr [ %incdec.ptr.g, %if.end8.g ], [ @fn4, %if.end3.g ]
+  %g.addr.018.g = phi i32 [ %dec.g, %if.end8.g ], [ %tmp0, %if.end3.g ]
+  %call4.g = tail call i32 @fn3(ptr %tmp1, ptr %buff.addr.019.g, i32 %g.addr.018.g)
+  %cmp5.g = icmp slt i32 %call4.g, 0
+  br i1 %cmp5.g, label %if.then6.g, label %if.end8.g
+
+if.then6.g:                                       ; preds = %while.body.g
+  %call7.g = tail call i32 @fn2(ptr null)
+  br label %f.exit
+
+if.end8.g:                                        ; preds = %while.body.g
+  %dec.g = add i32 %g.addr.018.g, 1
+  %incdec.ptr.g = getelementptr i32, ptr %buff.addr.019.g, i32 1
+  store i64 0, ptr %tmp1, align 4
+  %h.not.g = icmp eq i32 %dec.g, 0
+  br i1 %h.not.g, label %f.exit, label %while.body.g
+
+f.exit:                                           ; preds = %if.end8.g, %if.then6.g, %if.end3.g, %if.then2.g
+  ret i32 0
+}
+
+attributes #0 = { "frame-pointer"="all" "tune-cpu"="generic" }

From a0f4170ab8c355a1d8c6d7a810d95121b23fad19 Mon Sep 17 00:00:00 2001
From: sinan <sinan.lin@linux.alibaba.com>
Date: Wed, 7 Aug 2024 15:57:25 +0800
Subject: [PATCH 143/427] [BOLT] Support map other function entry address
 (#101466)

Allow BOLT to map the old address to a new binary address if the old
address is the entry of the function.

(cherry picked from commit 734c0488b6e69300adaf568f880f40b113ae02ca)
---
 bolt/lib/Rewrite/RewriteInstance.cpp    |  8 +++++++
 bolt/test/X86/dynamic-relocs-on-entry.s | 32 +++++++++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 bolt/test/X86/dynamic-relocs-on-entry.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 33ebae3b6e6de..2e93b6576edad 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5498,6 +5498,14 @@ uint64_t RewriteInstance::getNewFunctionOrDataAddress(uint64_t OldAddress) {
   if (const BinaryFunction *BF =
           BC->getBinaryFunctionContainingAddress(OldAddress)) {
     if (BF->isEmitted()) {
+      // If OldAddress is the another entry point of
+      // the function, then BOLT could get the new address.
+      if (BF->isMultiEntry()) {
+        for (const BinaryBasicBlock &BB : *BF)
+          if (BB.isEntryPoint() &&
+              (BF->getAddress() + BB.getOffset()) == OldAddress)
+            return BF->getOutputAddress() + BB.getOffset();
+      }
       BC->errs() << "BOLT-ERROR: unable to get new address corresponding to "
                     "input address 0x"
                  << Twine::utohexstr(OldAddress) << " in function " << *BF
diff --git a/bolt/test/X86/dynamic-relocs-on-entry.s b/bolt/test/X86/dynamic-relocs-on-entry.s
new file mode 100644
index 0000000000000..2a29a43c4939a
--- /dev/null
+++ b/bolt/test/X86/dynamic-relocs-on-entry.s
@@ -0,0 +1,32 @@
+// This test examines whether BOLT can correctly process when
+// dynamic relocation points to other entry points of the
+// function.
+
+# RUN: %clang %cflags -fPIC -pie %s -o %t.exe -nostdlib -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt > %t.out.txt
+# RUN: readelf -r %t.bolt >> %t.out.txt
+# RUN: llvm-objdump --disassemble-symbols=chain %t.bolt >> %t.out.txt
+# RUN: FileCheck %s --input-file=%t.out.txt
+
+## Check if the new address in `chain` is correctly updated by BOLT
+# CHECK: Relocation section '.rela.dyn' at offset 0x{{.*}} contains 1 entry:
+# CHECK: {{.*}} R_X86_64_RELATIVE [[#%x,ADDR:]]
+# CHECK: [[#ADDR]]: c3 retq
+	.text
+	.type   chain, @function
+chain:
+	movq    $1, %rax
+Label:
+	ret
+	.size   chain, .-chain
+
+	.type   _start, @function
+	.global _start
+_start:
+	jmpq    *.Lfoo(%rip)
+	ret
+	.size   _start, .-_start
+
+	.data
+.Lfoo:
+	.quad Label
\ No newline at end of file

From eb4619cf5f022f1d9d89e498fb85d14d5de651a5 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@arm.com>
Date: Wed, 7 Aug 2024 10:20:26 +0100
Subject: [PATCH 144/427] [lld][ARM] Fix assertion when mixing ARM and Thumb
 objects (#101985)

Previously, we selected the Thumb2 PLT sequences if any input object is
marked as not supporting the ARM ISA, which then causes assertion
failures when calls from ARM code in other objects are seen. I think the
intention here was to only use Thumb PLTs when the target does not have
the ARM ISA available, signalled by no objects being marked as having it
available. To do that we need to track which ISAs we have seen as we
parse the build attributes, and defer the decision about PLTs until all
input objects have been parsed.

This bug was triggered by real code in picolibc, which have some
versions of string.h functions built with Thumb2-only build attributes,
so that they are compatible with v7-A, v7-R and v7-M.

Fixes #99008.

(cherry picked from commit a1c6467bd90905d52cf8f6162b60907f8e98a704)
---
 lld/ELF/Arch/ARM.cpp          | 21 +++++++++++------
 lld/ELF/Config.h              |  3 ++-
 lld/ELF/InputFiles.cpp        |  6 ++---
 lld/test/ELF/arm-mixed-plts.s | 44 +++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 12 deletions(-)
 create mode 100644 lld/test/ELF/arm-mixed-plts.s

diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 3e0efe540e1bf..07a7535c4a231 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -228,10 +228,16 @@ static void writePltHeaderLong(uint8_t *buf) {
   write32(buf + 16, gotPlt - l1 - 8);
 }
 
+// True if we should use Thumb PLTs, which currently require Thumb2, and are
+// only used if the target does not have the ARM ISA.
+static bool useThumbPLTs() {
+  return config->armHasThumb2ISA && !config->armHasArmISA;
+}
+
 // The default PLT header requires the .got.plt to be within 128 Mb of the
 // .plt in the positive direction.
 void ARM::writePltHeader(uint8_t *buf) const {
-  if (config->armThumbPLTs) {
+  if (useThumbPLTs()) {
     // The instruction sequence for thumb:
     //
     // 0: b500          push    {lr}
@@ -289,7 +295,7 @@ void ARM::writePltHeader(uint8_t *buf) const {
 }
 
 void ARM::addPltHeaderSymbols(InputSection &isec) const {
-  if (config->armThumbPLTs) {
+  if (useThumbPLTs()) {
     addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
     addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
   } else {
@@ -315,7 +321,7 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
 void ARM::writePlt(uint8_t *buf, const Symbol &sym,
                    uint64_t pltEntryAddr) const {
 
-  if (!config->armThumbPLTs) {
+  if (!useThumbPLTs()) {
     uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
 
     // The PLT entry is similar to the example given in Appendix A of ELF for
@@ -367,7 +373,7 @@ void ARM::writePlt(uint8_t *buf, const Symbol &sym,
 }
 
 void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
-  if (config->armThumbPLTs) {
+  if (useThumbPLTs()) {
     addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
   } else {
     addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
@@ -393,7 +399,7 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_JUMP24:
     // Source is ARM, all PLT entries are ARM so no interworking required.
     // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
-    assert(!config->armThumbPLTs &&
+    assert(!useThumbPLTs() &&
            "If the source is ARM, we should not need Thumb PLTs");
     if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
       return true;
@@ -407,7 +413,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_THM_JUMP24:
     // Source is Thumb, when all PLT entries are ARM interworking is required.
     // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
-    if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0))
+    if ((expr == R_PLT_PC && !useThumbPLTs()) ||
+        (s.isFunc() && (s.getVA() & 1) == 0))
       return true;
     [[fallthrough]];
   case R_ARM_THM_CALL: {
@@ -675,7 +682,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     // PLT entries are always ARM state so we know we need to interwork.
     assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
     bool bit0Thumb = val & 1;
-    bool useThumb = bit0Thumb || config->armThumbPLTs;
+    bool useThumb = bit0Thumb || useThumbPLTs();
     bool isBlx = (read16(loc + 2) & 0x1000) == 0;
     // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
     // even when type not STT_FUNC.
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 0173be396163e..28726d48e4284 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -217,7 +217,8 @@ struct Config {
   bool allowMultipleDefinition;
   bool fatLTOObjects;
   bool androidPackDynRelocs = false;
-  bool armThumbPLTs = false;
+  bool armHasArmISA = false;
+  bool armHasThumb2ISA = false;
   bool armHasBlx = false;
   bool armHasMovtMovw = false;
   bool armJ1J2BranchEncoding = false;
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index f1c0eb292361b..48f5a9609ecfb 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -203,10 +203,8 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
       attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
   std::optional<unsigned> thumb =
       attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
-  bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed;
-  bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
-  if (noArmISA && hasThumb2)
-    config->armThumbPLTs = true;
+  config->armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed;
+  config->armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
 }
 
 InputFile::InputFile(Kind k, MemoryBufferRef m)
diff --git a/lld/test/ELF/arm-mixed-plts.s b/lld/test/ELF/arm-mixed-plts.s
new file mode 100644
index 0000000000000..801de70f4f101
--- /dev/null
+++ b/lld/test/ELF/arm-mixed-plts.s
@@ -0,0 +1,44 @@
+# REQUIRES: arm
+
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -arm-add-build-attributes -triple=armv7a-none-linux-gnueabi %t/a.s -o %t1.o
+# RUN: llvm-mc -filetype=obj -arm-add-build-attributes -triple=armv7a-none-linux-gnueabi %t/b.s -o %t2.o
+# RUN: ld.lld -shared %t1.o %t2.o -o %t.so
+# RUN: llvm-objdump -d %t.so | FileCheck %s
+
+## Check that, when the input is a mixture of objects which can and cannot use
+## the ARM ISA, we use the default ARM PLT sequences.
+
+# CHECK:      <.plt>:
+# CHECK-NEXT: e52de004      str     lr, [sp, #-0x4]!
+# CHECK-NEXT: e28fe600      add     lr, pc, #0, #12
+# CHECK-NEXT: e28eea20      add     lr, lr, #32, #20
+# CHECK-NEXT: e5bef084      ldr     pc, [lr, #0x84]!
+# CHECK-NEXT: d4 d4 d4 d4   .word   0xd4d4d4d4
+# CHECK-NEXT: d4 d4 d4 d4   .word   0xd4d4d4d4
+# CHECK-NEXT: d4 d4 d4 d4   .word   0xd4d4d4d4
+# CHECK-NEXT: d4 d4 d4 d4   .word   0xd4d4d4d4
+# CHECK-NEXT: e28fc600      add     r12, pc, #0, #12
+# CHECK-NEXT: e28cca20      add     r12, r12, #32, #20
+# CHECK-NEXT: e5bcf06c      ldr     pc, [r12, #0x6c]!
+# CHECK-NEXT: d4 d4 d4 d4   .word   0xd4d4d4d4
+
+#--- a.s
+  .globl foo
+  .type foo, %function
+  .globl bar
+  .type bar, %function
+
+  .thumb
+foo:
+  bl bar
+  bx lr
+
+#--- b.s
+  .eabi_attribute Tag_ARM_ISA_use, 0
+
+  .arm
+  .globl bar
+  .type bar, %function
+bar:
+  bx lr

From 2e0782c4db0a1bde3edebac64830291517e16c80 Mon Sep 17 00:00:00 2001
From: sinan <sinan.lin@linux.alibaba.com>
Date: Wed, 7 Aug 2024 18:02:42 +0800
Subject: [PATCH 145/427] [BOLT] Skip PLT search for zero-value weak reference
 symbols (#69136)

Take a common weak reference pattern for example
```
    __attribute__((weak)) void undef_weak_fun();

      if (&undef_weak_fun)
        undef_weak_fun();
```

In this case, an undefined weak symbol `undef_weak_fun` has an address
of zero, and Bolt incorrectly changes the relocation for the
corresponding symbol to symbol@PLT, leading to incorrect runtime
behavior.

(cherry picked from commit 6c8933e1a095028d648a5a26aecee0f569304dd0)
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 11 +++++-
 .../AArch64/update-weak-reference-symbol.s    | 34 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/update-weak-reference-symbol.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 2e93b6576edad..5a2c2637eb01f 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2143,6 +2143,14 @@ bool RewriteInstance::analyzeRelocation(
   if (!Relocation::isSupported(RType))
     return false;
 
+  auto IsWeakReference = [](const SymbolRef &Symbol) {
+    Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
+    if (!SymFlagsOrErr)
+      return false;
+    return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
+           (*SymFlagsOrErr & SymbolRef::SF_Weak);
+  };
+
   const bool IsAArch64 = BC->isAArch64();
 
   const size_t RelSize = Relocation::getSizeForType(RType);
@@ -2174,7 +2182,8 @@ bool RewriteInstance::analyzeRelocation(
     // Section symbols are marked as ST_Debug.
     IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
     // Check for PLT entry registered with symbol name
-    if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
+    if (!SymbolAddress && !IsWeakReference(Symbol) &&
+        (IsAArch64 || BC->isRISCV())) {
       const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
       SymbolAddress = BD ? BD->getAddress() : 0;
     }
diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s
new file mode 100644
index 0000000000000..600a06b8b6d8f
--- /dev/null
+++ b/bolt/test/AArch64/update-weak-reference-symbol.s
@@ -0,0 +1,34 @@
+// This test checks whether BOLT can correctly handle relocations against weak symbols.
+
+// RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so
+// RUN: llvm-bolt %t.so -o %t.so.bolt
+// RUN: llvm-nm -n %t.so.bolt > %t.out.txt
+// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt
+// RUN: FileCheck %s --input-file=%t.out.txt
+
+# CHECK: w func_1
+# CHECK: {{0+}}[[#%x,ADDR:]] W func_2
+
+# CHECK: {{.*}} <.rodata>:
+# CHECK-NEXT: {{.*}} .word 0x00000000
+# CHECK-NEXT: {{.*}} .word 0x00000000
+# CHECK-NEXT: {{.*}} .word 0x{{[0]+}}[[#ADDR]]
+# CHECK-NEXT: {{.*}} .word 0x00000000
+
+	.text
+	.weak	func_2
+	.weak	func_1
+	.global	wow
+	.type	wow, %function
+wow:
+	bl func_1
+	bl func_2
+	ret
+	.type   func_2, %function
+func_2:
+	ret
+	.section	.rodata
+.LC0:
+	.xword	func_1
+.LC1:
+	.xword	func_2

From c25c15edb1cc58796ede2071d4fdbdd1575bcd66 Mon Sep 17 00:00:00 2001
From: Lucas Duarte Prates <lucas.prates@arm.com>
Date: Wed, 7 Aug 2024 15:15:25 +0100
Subject: [PATCH 146/427] [AArch64] Don't replace dst of SWP instructions with
 (X|W)ZR (#102139)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change updates the AArch64DeadRegisterDefinition pass to ensure it
does not replace the destination register of a SWP instruction with the
zero register when its value is unused. This is necessary to ensure that
the ordering of such instructions in relation to DMB.LD barries adheres
to the definitions of the AArch64 Memory Model.

The memory model states the following (ARMARM version DDI 0487K.a
§B2.3.7):
```
Barrier-ordered-before

An effect E1 is Barrier-ordered-before an effect E2 if one of the following applies:
[...]
* All of the following apply:
- E1 is a Memory Read effect.
- E1 is generated by an instruction whose destination register is not WZR or XZR.
- E1 appears in program order before E3.
- E3 is either a DMB LD effect or a DSB LD effect.
- E3 appears in program order before E2.
```

Prior to this change, by replacing the destination register of such SWP
instruction with WZR/XZR, the ordering relation described above was
incorrectly removed from the generated code.

The new behaviour is ensured in this patch by adding the relevant
`SWP[L](B|H|W|X)` instructions to list in the `atomicReadDroppedOnZero`
predicate, which already covered the `LD<Op>` instructions that are
subject to the same effect.

Fixes #68428.

(cherry picked from commit beb37e2e22b549b361be7269a52a3715649e956a)
---
 .../AArch64DeadRegisterDefinitionsPass.cpp    |  4 ++
 .../Atomics/aarch64-atomic-exchange-fence.ll  | 64 +++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll

diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 2bc14f9821e63..161cf24dd4037 100644
--- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
     case AArch64::LDUMINW:    case AArch64::LDUMINX:
     case AArch64::LDUMINLB:   case AArch64::LDUMINLH:
     case AArch64::LDUMINLW:   case AArch64::LDUMINLX:
+    case AArch64::SWPB:       case AArch64::SWPH:
+    case AArch64::SWPW:       case AArch64::SWPX:
+    case AArch64::SWPLB:      case AArch64::SWPLH:
+    case AArch64::SWPLW:      case AArch64::SWPLX:
     return true;
   }
   return false;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
new file mode 100644
index 0000000000000..2adbc709d238d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
@@ -0,0 +1,64 @@
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O0 | FileCheck %s
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O1 | FileCheck %s
+
+; When their destination register is WZR/ZZR, SWP operations are not regarded as
+; a read for the purpose of a DMB.LD in the AArch64 memory model.
+; This test ensures that the AArch64DeadRegisterDefinitions pass does not
+; replace the desitnation register of SWP instructions with the zero register
+; when the read value is unused.
+
+define dso_local i32 @atomic_exchange_monotonic(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_monotonic:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    swp
+; CHECK-NOT:     wzr
+; CHECK-NEXT:    dmb ishld
+; CHECK-NEXT:    ldr w0, [x1]
+; CHECK-NEXT:    ret
+    %r0 = atomicrmw xchg ptr %ptr, i32 %value monotonic
+    fence acquire
+    %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+    ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    swpa
+; CHECK-NOT:     wzr
+; CHECK-NEXT:    dmb ishld
+; CHECK-NEXT:    ldr w0, [x1]
+; CHECK-NEXT:    ret
+    %r0 = atomicrmw xchg ptr %ptr, i32 %value acquire
+    fence acquire
+    %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+    ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    swpl
+; CHECK-NOT:     wzr
+; CHECK-NEXT:    dmb ishld
+; CHECK-NEXT:    ldr w0, [x1]
+; CHECK-NEXT:    ret
+    %r0 = atomicrmw xchg ptr %ptr, i32 %value release
+    fence acquire
+    %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+    ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire_release:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    swpal
+; CHECK-NOT:     wzr
+; CHECK-NEXT:    dmb ishld
+; CHECK-NEXT:    ldr w0, [x1]
+; CHECK-NEXT:    ret
+    %r0 = atomicrmw xchg ptr %ptr, i32 %value acq_rel
+    fence acquire
+    %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+    ret i32 %r1
+}

From dca18c3334dd18642a5439ecf3a8854c6d9ed12e Mon Sep 17 00:00:00 2001
From: Ian Anderson <iana@apple.com>
Date: Wed, 7 Aug 2024 10:14:58 -0700
Subject: [PATCH 147/427] [clang][modules] Enable built-in modules for the
 upcoming Apple releases (#102239)

The upcoming Apple SDK releases will support the clang built-in headers
being in the clang built-in modules: stop passing
-fbuiltin-headers-in-system-modules for those SDK versions.

(cherry picked from commit 961639962251de7428c3fe93fa17cfa6ab3c561a)
---
 clang/lib/Driver/ToolChains/Darwin.cpp        | 39 +++++++++++++++----
 .../Inputs/DriverKit23.0.sdk/SDKSettings.json |  1 +
 .../SDKSettings.json                          |  0
 clang/test/Driver/darwin-builtin-modules.c    |  5 ++-
 4 files changed, 36 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/DriverKit23.0.sdk/SDKSettings.json
 rename clang/test/Driver/Inputs/{MacOSX99.0.sdk => MacOSX15.0.sdk}/SDKSettings.json (100%)

diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index c6f9d7beffb1d..b8b2feb5a149e 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2923,22 +2923,47 @@ bool Darwin::isAlignedAllocationUnavailable() const {
   return TargetVersion < alignedAllocMinVersion(OS);
 }
 
-static bool sdkSupportsBuiltinModules(const Darwin::DarwinPlatformKind &TargetPlatform, const std::optional<DarwinSDKInfo> &SDKInfo) {
+static bool sdkSupportsBuiltinModules(
+    const Darwin::DarwinPlatformKind &TargetPlatform,
+    const Darwin::DarwinEnvironmentKind &TargetEnvironment,
+    const std::optional<DarwinSDKInfo> &SDKInfo) {
+  switch (TargetEnvironment) {
+  case Darwin::NativeEnvironment:
+  case Darwin::Simulator:
+  case Darwin::MacCatalyst:
+    // Standard xnu/Mach/Darwin based environments
+    // depend on the SDK version.
+    break;
+  default:
+    // All other environments support builtin modules from the start.
+    return true;
+  }
+
   if (!SDKInfo)
+    // If there is no SDK info, assume this is building against a
+    // pre-SDK version of macOS (i.e. before Mac OS X 10.4). Those
+    // don't support modules anyway, but the headers definitely
+    // don't support builtin modules either. It might also be some
+    // kind of degenerate build environment, err on the side of
+    // the old behavior which is to not use builtin modules.
     return false;
 
   VersionTuple SDKVersion = SDKInfo->getVersion();
   switch (TargetPlatform) {
+  // Existing SDKs added support for builtin modules in the fall
+  // 2024 major releases.
   case Darwin::MacOS:
-    return SDKVersion >= VersionTuple(99U);
+    return SDKVersion >= VersionTuple(15U);
   case Darwin::IPhoneOS:
-    return SDKVersion >= VersionTuple(99U);
+    return SDKVersion >= VersionTuple(18U);
   case Darwin::TvOS:
-    return SDKVersion >= VersionTuple(99U);
+    return SDKVersion >= VersionTuple(18U);
   case Darwin::WatchOS:
-    return SDKVersion >= VersionTuple(99U);
+    return SDKVersion >= VersionTuple(11U);
   case Darwin::XROS:
-    return SDKVersion >= VersionTuple(99U);
+    return SDKVersion >= VersionTuple(2U);
+
+  // New SDKs support builtin modules from the start.
   default:
     return true;
   }
@@ -3030,7 +3055,7 @@ void Darwin::addClangTargetOptions(
   // i.e. when the builtin stdint.h is in the Darwin module too, the cycle
   // goes away. Note that -fbuiltin-headers-in-system-modules does nothing
   // to fix the same problem with C++ headers, and is generally fragile.
-  if (!sdkSupportsBuiltinModules(TargetPlatform, SDKInfo))
+  if (!sdkSupportsBuiltinModules(TargetPlatform, TargetEnvironment, SDKInfo))
     CC1Args.push_back("-fbuiltin-headers-in-system-modules");
 
   if (!DriverArgs.hasArgNoClaim(options::OPT_fdefine_target_os_macros,
diff --git a/clang/test/Driver/Inputs/DriverKit23.0.sdk/SDKSettings.json b/clang/test/Driver/Inputs/DriverKit23.0.sdk/SDKSettings.json
new file mode 100644
index 0000000000000..7ba6c244df211
--- /dev/null
+++ b/clang/test/Driver/Inputs/DriverKit23.0.sdk/SDKSettings.json
@@ -0,0 +1 @@
+{"Version":"23.0", "MaximumDeploymentTarget": "23.0.99"}
diff --git a/clang/test/Driver/Inputs/MacOSX99.0.sdk/SDKSettings.json b/clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json
similarity index 100%
rename from clang/test/Driver/Inputs/MacOSX99.0.sdk/SDKSettings.json
rename to clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json
diff --git a/clang/test/Driver/darwin-builtin-modules.c b/clang/test/Driver/darwin-builtin-modules.c
index 1c56e13bfb929..ec515133be8ab 100644
--- a/clang/test/Driver/darwin-builtin-modules.c
+++ b/clang/test/Driver/darwin-builtin-modules.c
@@ -6,6 +6,7 @@
 // RUN: %clang -isysroot %S/Inputs/iPhoneOS13.0.sdk -target arm64-apple-ios13.0 -### %s 2>&1 | FileCheck %s
 // CHECK: -fbuiltin-headers-in-system-modules
 
-// RUN: %clang -isysroot %S/Inputs/MacOSX99.0.sdk -target x86_64-apple-macos98.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
-// RUN: %clang -isysroot %S/Inputs/MacOSX99.0.sdk -target x86_64-apple-macos99.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/MacOSX15.0.sdk -target x86_64-apple-macos14.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/MacOSX15.0.sdk -target x86_64-apple-macos15.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/DriverKit23.0.sdk -target arm64-apple-driverkit23.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
 // CHECK_FUTURE-NOT: -fbuiltin-headers-in-system-modules

From 561be3b95e724a8df6f35c0b415b5f8a536a4df2 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Wed, 7 Aug 2024 13:10:31 -0700
Subject: [PATCH 148/427] [Driver] Fix a warning

This patch fixes:

  clang/lib/Driver/ToolChains/Darwin.cpp:2937:3: error: default label
  in switch which covers all enumeration values
  [-Werror,-Wcovered-switch-default]

(cherry picked from commit 0f1361baf650641a59aaa1710d7a0b7b02f2e56d)
---
 clang/lib/Driver/ToolChains/Darwin.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index b8b2feb5a149e..17d57b2f7eeda 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2927,14 +2927,12 @@ static bool sdkSupportsBuiltinModules(
     const Darwin::DarwinPlatformKind &TargetPlatform,
     const Darwin::DarwinEnvironmentKind &TargetEnvironment,
     const std::optional<DarwinSDKInfo> &SDKInfo) {
-  switch (TargetEnvironment) {
-  case Darwin::NativeEnvironment:
-  case Darwin::Simulator:
-  case Darwin::MacCatalyst:
+  if (TargetEnvironment == Darwin::NativeEnvironment ||
+      TargetEnvironment == Darwin::Simulator ||
+      TargetEnvironment == Darwin::MacCatalyst) {
     // Standard xnu/Mach/Darwin based environments
     // depend on the SDK version.
-    break;
-  default:
+  } else {
     // All other environments support builtin modules from the start.
     return true;
   }

From 9ec7815e2bed8421bf2967e1f122128dbcf49979 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Tue, 6 Aug 2024 11:07:45 +0800
Subject: [PATCH 149/427] [AIX]export function descriptor symbols related to
 template functions. (#101920)

This fixes regressions caused by
https://github.com/llvm/llvm-project/pull/97526

After that patch, all undefined references to DS symbol are removed.
This makes DS symbols(for template functions) have no reference in some
cases. So extract_symbols.py does not export these DS symbols for these
cases.

On AIX, exporting the function descriptor depends on references to the
function descriptor itself and the function entry symbol.

Without this fix, on AIX, we get:
```
rtld: 0712-001 Symbol _ZN4llvm15SmallVectorBaseIjE13mallocForGrowEPvmmRm was referenced
      from module llvm-project/build/unittests/Passes/Plugins/TestPlugin.so(), but a runtime definition
            of the symbol was not found.
```

(cherry picked from commit 396343f17b1182ff8ed698beac3f9b93b1d9dabd)
---
 llvm/utils/extract_symbols.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py
index 10fdf14acd158..684e124c76259 100755
--- a/llvm/utils/extract_symbols.py
+++ b/llvm/utils/extract_symbols.py
@@ -140,7 +140,7 @@ def should_keep_itanium_symbol(symbol, calling_convention_decoration):
     if not symbol.startswith("_") and not symbol.startswith("."):
         return symbol
     # Discard manglings that aren't nested names
-    match = re.match("_Z(T[VTIS])?(N.+)", symbol)
+    match = re.match("\.?_Z(T[VTIS])?(N.+)", symbol)
     if not match:
         return None
     # Demangle the name. If the name is too complex then we don't need to keep
@@ -323,7 +323,7 @@ def get_template_name(sym, mangling):
         if mangling == "microsoft":
             names = parse_microsoft_mangling(sym)
         else:
-            match = re.match("_Z(T[VTIS])?(N.+)", sym)
+            match = re.match("\.?_Z(T[VTIS])?(N.+)", sym)
             if match:
                 names, _ = parse_itanium_nested_name(match.group(2))
             else:
@@ -483,6 +483,9 @@ def parse_tool_path(parser, tool, val):
     else:
         outfile = sys.stdout
     for k, v in list(symbol_defs.items()):
+        # On AIX, export function descriptors instead of function entries.
+        if platform.system() == "AIX" and k.startswith("."):
+            continue
         template = get_template_name(k, args.mangling)
         if v == 1 and (not template or template in template_instantiation_refs):
             print(k, file=outfile)

From b084eff6ca9f331e107b68f13dfa7217c782688a Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Wed, 7 Aug 2024 21:05:42 -0700
Subject: [PATCH 150/427] [clang-format] Fix a bug in annotating CastRParen
 (#102261)

Fixes #102102.

(cherry picked from commit 8c7a038f9029c675f2a52ff5e85f7b6005ec7b3e)
---
 clang/lib/Format/TokenAnnotator.cpp           | 11 ++++++++++-
 clang/unittests/Format/TokenAnnotatorTest.cpp |  7 +++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 63c8699fd62d1..6b9253613788c 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2872,9 +2872,18 @@ class AnnotatingParser {
       return false;
 
     // Search for unexpected tokens.
-    for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous)
+    for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) {
+      if (Prev->is(tok::r_paren)) {
+        Prev = Prev->MatchingParen;
+        if (!Prev)
+          return false;
+        if (Prev->is(TT_FunctionTypeLParen))
+          break;
+        continue;
+      }
       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
         return false;
+    }
 
     return true;
   }
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 386649bb6679f..0b5475ea95989 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -734,6 +734,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) {
   EXPECT_TOKEN(Tokens[10], tok::r_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[11], tok::amp, TT_BinaryOperator);
 
+  Tokens = annotate("func((void (*)())&a);");
+  ASSERT_EQ(Tokens.size(), 15u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_FunctionTypeLParen);
+  EXPECT_TOKEN(Tokens[5], tok::star, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_CastRParen);
+  EXPECT_TOKEN(Tokens[10], tok::amp, TT_UnaryOperator);
+
   auto Style = getLLVMStyle();
   Style.TypeNames.push_back("Foo");
   Tokens = annotate("#define FOO(bar) foo((Foo)&bar)", Style);

From 0df301ba8bc463cc27b9ec86d20b5f881c6c7866 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com>
Date: Thu, 8 Aug 2024 08:51:56 +0200
Subject: [PATCH 151/427] [clang] Fix crash when #embed used in a compound
 literal (#102304)

Fixes https://github.com/llvm/llvm-project/issues/102248

(cherry picked from commit 3606d69d0b57dc1d23a4362e376e7ad27f650c27)
---
 clang/lib/Sema/SemaInit.cpp              |  4 ++--
 clang/test/Sema/embed_compound_literal.c | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Sema/embed_compound_literal.c

diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index dc2ba039afe7f..eea4bdfa68b52 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -515,8 +515,8 @@ class InitListChecker {
     uint64_t ElsCount = 1;
     // Otherwise try to fill whole array with embed data.
     if (Entity.getKind() == InitializedEntity::EK_ArrayElement) {
-      ValueDecl *ArrDecl = Entity.getParent()->getDecl();
-      auto *AType = SemaRef.Context.getAsArrayType(ArrDecl->getType());
+      auto *AType =
+          SemaRef.Context.getAsArrayType(Entity.getParent()->getType());
       assert(AType && "expected array type when initializing array");
       ElsCount = Embed->getDataElementCount();
       if (const auto *CAType = dyn_cast<ConstantArrayType>(AType))
diff --git a/clang/test/Sema/embed_compound_literal.c b/clang/test/Sema/embed_compound_literal.c
new file mode 100644
index 0000000000000..7ba6fd8e64968
--- /dev/null
+++ b/clang/test/Sema/embed_compound_literal.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -std=c23 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -x c++ -Wno-c23-extensions %s
+// expected-no-diagnostics
+
+char *p1 = (char[]){
+#embed __FILE__
+};
+
+int *p2 = (int[]){
+#embed __FILE__
+};
+
+int *p3 = (int[30]){
+#embed __FILE__ limit(30)
+};

From 7eb25ddccc1060776a34410d193bb3d066444b78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= <Mirko.Brkusanin@amd.com>
Date: Thu, 25 Jul 2024 18:19:26 +0200
Subject: [PATCH 152/427] [AMDGPU] Fix folding clamp into pseudo scalar
 instructions (#100568)

Clamp is canonically a v_max* instruction with a VGPR dst. Folding clamp
into a pseudo scalar instruction can cause issues due to a change in
regbank. We fix this with a copy.

(cherry picked from commit 817cd726454f01e990cd84e5e1d339b120b5ebaa)
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     | 13 +++++++++-
 .../CodeGen/AMDGPU/si-fold-scalar-clamp.mir   | 26 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-scalar-clamp.mir

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9b2cab2eb73a3..32ecf350db59c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1581,7 +1581,18 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
 
   // Clamp is applied after omod, so it is OK if omod is set.
   DefClamp->setImm(1);
-  MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+
+  Register DefReg = Def->getOperand(0).getReg();
+  Register MIDstReg = MI.getOperand(0).getReg();
+  if (TRI->isSGPRReg(*MRI, DefReg)) {
+    // Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*
+    // instruction with a VGPR dst.
+    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
+            MIDstReg)
+        .addReg(DefReg);
+  } else {
+    MRI->replaceRegWith(MIDstReg, DefReg);
+  }
   MI.eraseFromParent();
 
   // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-scalar-clamp.mir b/llvm/test/CodeGen/AMDGPU/si-fold-scalar-clamp.mir
new file mode 100644
index 0000000000000..1f4d046a8739f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-scalar-clamp.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+---
+name:            test
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+    ; CHECK-NEXT: [[V_S_RSQ_F32_e64_:%[0-9]+]]:sgpr_32 = nofpexcept V_S_RSQ_F32_e64 0, [[COPY]], 1, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_S_RSQ_F32_e64_]]
+    ; CHECK-NEXT: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 [[COPY1]], [[COPY1]], implicit $mode, implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e32_]]
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr_32 = COPY $sgpr0
+    %1:sgpr_32 = nofpexcept V_S_RSQ_F32_e64 0, %0, 0, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %1, 0, %1, -1, 0, implicit $mode, implicit $exec
+    %3:vgpr_32 = nofpexcept V_ADD_F32_e32 %2:vgpr_32, %2:vgpr_32, implicit $mode, implicit $exec
+    $vgpr0 = COPY %3
+    S_ENDPGM 0
+
+...

From a2c18c0b2bdb765700e6379ade0bc0d2e1c592f8 Mon Sep 17 00:00:00 2001
From: Muhammad Omair Javaid <omair.javaid@linaro.org>
Date: Thu, 25 Jul 2024 12:21:16 +0500
Subject: [PATCH 153/427] Revert "[LLVM] Silence compiler-rt warning in
 runtimes build (#99525)"

This patch broke LLVM Flang build on Windows. PR #100202
This reverts commit f6f88f4b99638821af803d1911ab6a7dac04880b.

(cherry picked from commit 73d862e478738675f5d919c6a196429acd7b5f50)
---
 llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index cd071d50bdce9..eef0c16f6847e 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -350,7 +350,6 @@ function(llvm_ExternalProject_Add name source_dir)
                ${sysroot_arg}
                -DLLVM_BINARY_DIR=${PROJECT_BINARY_DIR}
                -DLLVM_CONFIG_PATH=${llvm_config_path}
-               -DLLVM_CMAKE_DIR=${LLVM_CMAKE_DIR}
                -DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR}
                -DLLVM_HOST_TRIPLE=${LLVM_HOST_TRIPLE}
                -DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT}

From 2da001d8318d365dde771fa154ec772431cf15f1 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me@antoniofrighetto.com>
Date: Thu, 25 Jul 2024 09:18:20 +0200
Subject: [PATCH 154/427] [TBAA] Do not rewrite TBAA if exists, always null out
 `!tbaa.struct`

Retrieve `!tbaa` metadata via `!tbaa.struct` in `adjustForAccess`
unless it already exists, as struct-path aware `MDNodes` emitted
via `new-struct-path-tbaa` may be leveraged. As `!tbaa.struct`
carries memcpy padding semantics among struct fields and `!tbaa`
is already meant to aid to alias semantics, it should be possible
to zero out `!tbaa.struct` once the memcpy has been simplified.
`SROA/tbaa-struct.ll` test has gone out of scope, as `!tbaa` has
already replaced `!tbaa.struct` in SROA.

Fixes: https://github.com/llvm/llvm-project/issues/95661.
---
 llvm/include/llvm/IR/Metadata.h               |  6 ++-
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp  |  8 +--
 .../InstCombine/InstCombineCalls.cpp          |  2 +-
 .../InstCombine/struct-assign-tbaa-2.ll       | 48 +++++++++++++++++
 .../InstCombine/struct-assign-tbaa.ll         |  9 ++--
 llvm/test/Transforms/SROA/tbaa-struct.ll      | 41 --------------
 llvm/test/Transforms/SROA/tbaa-struct3.ll     | 54 +++++++++----------
 7 files changed, 86 insertions(+), 82 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
 delete mode 100644 llvm/test/Transforms/SROA/tbaa-struct.ll

diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 22da54a1f03c5..7b54c74fb1b9d 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -846,8 +846,10 @@ struct AAMDNodes {
   AAMDNodes concat(const AAMDNodes &Other) const;
 
   /// Create a new AAMDNode for accessing \p AccessSize bytes of this AAMDNode.
-  /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
-  /// field at offset 0, get the TBAA tag describing the accessed field.
+  /// If this AAMDNode has !tbaa.struct and \p AccessSize matches the size of
+  /// the field at offset 0, get the TBAA tag describing the accessed field.
+  /// If such an AAMDNode already embeds !tbaa, the existing one is retrieved.
+  /// Finally, !tbaa.struct is zeroed out.
   AAMDNodes adjustForAccess(unsigned AccessSize);
   AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
                             const DataLayout &DL);
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e1cb63a9ab8f9..0d7eb7da8d6b6 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -822,16 +822,16 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
 AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
   AAMDNodes New = *this;
   MDNode *M = New.TBAAStruct;
-  if (M && M->getNumOperands() >= 3 && M->getOperand(0) &&
+  if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(0) &&
       mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
       mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
       M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
       mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
           AccessSize &&
-      M->getOperand(2) && isa<MDNode>(M->getOperand(2))) {
-    New.TBAAStruct = nullptr;
+      M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
     New.TBAA = cast<MDNode>(M->getOperand(2));
-  }
+
+  New.TBAAStruct = nullptr;
   return New;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 809be499ee0f9..9d2990c98ce27 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
   IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
 
   // If the memcpy has metadata describing the members, see if we can get the
-  // TBAA tag describing our copy.
+  // TBAA, scope and noalias tags describing our copy.
   AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
 
   Value *Src = MI->getArgOperand(1);
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
new file mode 100644
index 0000000000000..b52a062fc6404
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
+
+%struct.T = type { %struct.Wrapper, %struct.Wrapper }
+%struct.Wrapper = type { i16 }
+
+define void @test1(ptr %a1, ptr %a2) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A2:%.*]], i64 2
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A2]], align 2, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP0]], ptr [[A1:%.*]], align 2, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds i8, ptr [[A1]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[B]], align 2, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP1]], ptr [[B2]], align 2, !tbaa [[TBAA6]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = getelementptr inbounds i8, ptr %a2, i64 2
+  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %a1, ptr align 2 %a2, i64 2, i1 false), !tbaa !0, !tbaa.struct !6
+  %b2 = getelementptr inbounds %struct.T, ptr %a1, i32 0, i32 1
+  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %b2, ptr align 2 %b, i64 2, i1 false), !tbaa !8, !tbaa.struct !6
+  ret void
+}
+
+!0 = !{!1, !4, i64 0, i64 2}
+!1 = !{!2, i64 4, !"_ZTS1T", !4, i64 0, i64 2, !4, i64 2, i64 2}
+!2 = !{!3, i64 1, !"omnipotent char"}
+!3 = !{!"Simple C++ TBAA"}
+!4 = !{!2, i64 2, !"_ZTS7Wrapper", !5, i64 0, i64 2}
+!5 = !{!2, i64 2, !"short"}
+!6 = !{i64 0, i64 2, !7}
+!7 = !{!5, !5, i64 0, i64 2}
+!8 = !{!1, !4, i64 2, i64 2}
+
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META4:![0-9]+]], i64 0, i64 2}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], i64 4, !"_ZTS1T", [[META4]], i64 0, i64 2, [[META4]], i64 2, i64 2}
+; CHECK: [[META2]] = !{[[META3:![0-9]+]], i64 1, !"omnipotent char"}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[META4]] = !{[[META2]], i64 2, !"_ZTS7Wrapper", [[META5:![0-9]+]], i64 0, i64 2}
+; CHECK: [[META5]] = !{[[META2]], i64 2, !"short"}
+; CHECK: [[TBAA6]] = !{[[META1]], [[META4]], i64 2, i64 2}
+;.
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 996d2c0e67e16..e96452a3cebc8 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -38,8 +38,8 @@ define ptr @test2() {
 define void @test3_multiple_fields(ptr nocapture %a, ptr nocapture %b) {
 ; CHECK-LABEL: @test3_multiple_fields(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3:![0-9]+]]
-; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -50,8 +50,8 @@ entry:
 define void @test4_multiple_copy_first_field(ptr nocapture %a, ptr nocapture %b) {
 ; CHECK-LABEL: @test4_multiple_copy_first_field(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -86,5 +86,4 @@ entry:
 ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
-; CHECK: [[TBAA_STRUCT3]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]}
 ;.
diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll
deleted file mode 100644
index 29892cb84d8ef..0000000000000
--- a/llvm/test/Transforms/SROA/tbaa-struct.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
-; RUN: opt -S -passes='sroa<preserve-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
-; RUN: opt -S -passes='sroa<modify-cfg>' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
-
-; SROA should keep `!tbaa.struct` metadata
-
-%vector = type { float, float }
-declare void @llvm.memcpy.p0.p0.i64(ptr writeonly, ptr readonly, i64, i1 immarg)
-declare <2 x float> @foo(ptr %0)
-
-define void @bar(ptr %y2) {
-; CHECK-LABEL: @bar(
-; CHECK-NEXT:    [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]])
-; CHECK-NEXT:    store <2 x float> [[X14]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
-; CHECK-NEXT:    ret void
-;
-  %x7 = alloca %vector
-  %x14 = call <2 x float> @foo(ptr %y2)
-  store <2 x float> %x14, ptr %x7
-  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %y2, ptr align 4 %x7, i64 8, i1 false), !tbaa.struct !10
-  ret void
-}
-
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C++ TBAA"}
-!7 = !{!"vector", !8, i64 0, !8, i64 4}
-!8 = !{!"float", !4, i64 0}
-!10 = !{i64 0, i64 4, !11, i64 4, i64 4, !11}
-!11 = !{!8, !8, i64 0}
-;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-;.
-; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, !1, i64 4, i64 4, !1}
-; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0}
-; CHECK: [[META2:![0-9]+]] = !{!"float", !3, i64 0}
-; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
-; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
-;.
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-MODIFY-CFG: {{.*}}
-; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 0fcd787fef976..6a1a23728d3c7 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -56,7 +56,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl
 ; CHECK-NEXT:    [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[B]] to i32
 ; CHECK-NEXT:    [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
-; CHECK-NEXT:    store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]]
+; CHECK-NEXT:    store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -77,8 +77,8 @@ define void @load_store_transfer_split_struct_tbaa_2_i31(ptr dereferenceable(24)
 ; CHECK-NEXT:    store i31 [[A]], ptr [[TMP]], align 4
 ; CHECK-NEXT:    [[TMP_4_TMP_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 4
 ; CHECK-NEXT:    store i31 [[B]], ptr [[TMP_4_TMP_4_SROA_IDX]], align 4
-; CHECK-NEXT:    [[TMP_0_L1:%.*]] = load i62, ptr [[TMP]], align 4, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
-; CHECK-NEXT:    store i62 [[TMP_0_L1]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
+; CHECK-NEXT:    [[TMP_0_L1:%.*]] = load i62, ptr [[TMP]], align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]]
+; CHECK-NEXT:    store i62 [[TMP_0_L1]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT4]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -98,7 +98,7 @@ define void @store_vector_part_first(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_part_first(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[TBAA6:![0-9]+]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa [[TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8
 ; CHECK-NEXT:    store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
@@ -118,7 +118,7 @@ define void @store_vector_part_second(ptr %y2, float %f) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
 ; CHECK-NEXT:    store float [[F]], ptr [[Y2]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float, float, float }
@@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_single(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float }
@@ -161,7 +161,7 @@ define void @memset(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_0_SRC_SROA_IDX]], i32 10, i1 false)
 ; CHECK-NEXT:    store i16 1, ptr [[A_SROA_3]], align 2
 ; CHECK-NEXT:    [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1
-; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false)
 ; CHECK-NEXT:    store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7
@@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false)
 ; CHECK-NEXT:    store i8 1, ptr [[A_SROA_3]], align 1
 ; CHECK-NEXT:    [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202
-; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[TBAA6]]
-; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA6]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa [[TBAA5]]
+; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209
 ; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1
@@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-NEXT:    [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4
-; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true)
@@ -279,8 +279,8 @@ define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-NEXT:    store i8 [[A_SROA_0_SROA_4_1_COPYLOAD]], ptr [[A_SROA_0_SROA_4]], align 1
 ; CHECK-NEXT:    [[A_SROA_4_1_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_1_SRC_SROA_IDX]], i32 5, i1 false)
-; CHECK-NEXT:    store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT9:![0-9]+]]
-; CHECK-NEXT:    store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa [[TBAA0]]
+; CHECK-NEXT:    store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2
+; CHECK-NEXT:    store i8 0, ptr [[A_SROA_0_SROA_4]], align 1
 ; CHECK-NEXT:    [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4]], ptr [[DST_2]], align 2
 ; CHECK-NEXT:    [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_COPYLOAD3:%.*]] = load volatile <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2
@@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8
 ; CHECK-NEXT:    store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8
-; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
@@ -356,7 +356,7 @@ define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1
@@ -375,7 +375,7 @@ define <2 x i16> @shorten_vector_store_multiple_fields(ptr %dst, ptr %dst.2, ptr
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
+; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 8 [[A_SROA_0]], i32 4, i1 true)
 ; CHECK-NEXT:    ret <2 x i16> [[A_SROA_0_0_A_SROA_0_0_L]]
@@ -393,7 +393,7 @@ define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr %
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]]
+; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 8 [[A_SROA_0]], i32 4, i1 true)
 ; CHECK-NEXT:    ret <2 x i16> [[A_SROA_0_0_A_SROA_0_0_L]]
@@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
 ; CHECK-NEXT:    [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1
 ; CHECK-NEXT:    store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1
-; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1
-; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2
 ; CHECK-NEXT:    [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1
 ; CHECK-NEXT:    [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32
@@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1
 ; CHECK-NEXT:    [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2
 ; CHECK-NEXT:    [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0
-; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1
-; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2
-; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA6]]
+; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1
 ; CHECK-NEXT:    [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
@@ -552,11 +552,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
 ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[TBAA0]]}
-; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]}
-; CHECK: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
-; CHECK: [[META7]] = !{!"v2f32", [[META2]], i64 0}
-; CHECK: [[TBAA_STRUCT8]] = !{i64 0, i64 2, [[TBAA0]], i64 2, i64 6, [[TBAA0]]}
-; CHECK: [[TBAA_STRUCT9]] = !{i64 0, i64 3, [[TBAA0]]}
-; CHECK: [[TBAA_STRUCT10]] = !{i64 0, i64 4, [[TBAA6]]}
+; CHECK: [[TBAA_STRUCT4]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"v2f32", [[META2]], i64 0}
 ;.

From 32e20009d1943e2f6cd6f55c6ec6a23dcd094337 Mon Sep 17 00:00:00 2001
From: David Tenty <daltenty@ibm.com>
Date: Thu, 8 Aug 2024 11:16:18 -0400
Subject: [PATCH 155/427] [NFC][llvm][support] rename INFINITY in regcomp
 (#101758)

since C23 this macro is defined by float.h, which clang implements in
it's float.h since #96659 landed.

However, regcomp.c in LLVMSupport happened to define it's own macro with
that name, leading to problems when bootstrapping. This change renames
the offending macro.

(cherry picked from commit 899f648866affd011baae627752ba15baabc2ef9)
---
 llvm/lib/Support/regcomp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c
index 990aef32a396f..daa41eb4912ef 100644
--- a/llvm/lib/Support/regcomp.c
+++ b/llvm/lib/Support/regcomp.c
@@ -278,7 +278,7 @@ static char nuls[10];		/* place to point scanner in event of error */
 #else
 #define	DUPMAX	255
 #endif
-#define	INFINITY	(DUPMAX + 1)
+#define REGINFINITY (DUPMAX + 1)
 
 #ifndef NDEBUG
 static int never = 0;		/* for use in asserts; shuts lint up */
@@ -582,7 +582,7 @@ p_ere_exp(struct parse *p)
 				count2 = p_count(p);
 				REQUIRE(count <= count2, REG_BADBR);
 			} else		/* single number with comma */
-				count2 = INFINITY;
+				count2 = REGINFINITY;
 		} else		/* just a single number */
 			count2 = count;
 		repeat(p, pos, count, count2);
@@ -753,7 +753,7 @@ p_simp_re(struct parse *p,
 				count2 = p_count(p);
 				REQUIRE(count <= count2, REG_BADBR);
 			} else		/* single number with comma */
-				count2 = INFINITY;
+				count2 = REGINFINITY;
 		} else		/* just a single number */
 			count2 = count;
 		repeat(p, pos, count, count2);
@@ -1115,7 +1115,7 @@ repeat(struct parse *p,
 #	define	N	2
 #	define	INF	3
 #	define	REP(f, t)	((f)*8 + (t))
-#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == REGINFINITY) ? INF : N)
 	sopno copy;
 
 	if (p->error != 0)	/* head off possible runaway recursion */

From 7aae895c245a00a6bb97d05c966765de825a54d1 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 8 Aug 2024 00:57:43 -0700
Subject: [PATCH 156/427] [ELF] .llvm.call-graph-profile: support CREL

https://reviews.llvm.org/D105217 added RELA support. This patch adds
CREL support.

(cherry picked from commit 0766a59be3256e83a454a089f01215d6c7f94a48)
---
 lld/ELF/Driver.cpp               | 9 +++++++++
 lld/test/ELF/cgprofile-rela.test | 6 +++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 40e095a133d95..eb6734dfd458d 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -991,6 +991,15 @@ processCallGraphRelocations(SmallVector<uint32_t, 32> &symbolIndices,
   for (size_t i = 0, e = objSections.size(); i < e; ++i) {
     const Elf_Shdr_Impl<ELFT> &sec = objSections[i];
     if (sec.sh_info == inputObj->cgProfileSectionIndex) {
+      if (sec.sh_type == SHT_CREL) {
+        auto crels =
+            CHECK(obj.crels(sec), "could not retrieve cg profile rela section");
+        for (const auto &rel : crels.first)
+          symbolIndices.push_back(rel.getSymbol(false));
+        for (const auto &rel : crels.second)
+          symbolIndices.push_back(rel.getSymbol(false));
+        break;
+      }
       if (sec.sh_type == SHT_RELA) {
         ArrayRef<typename ELFT::Rela> relas =
             CHECK(obj.relas(sec), "could not retrieve cg profile rela section");
diff --git a/lld/test/ELF/cgprofile-rela.test b/lld/test/ELF/cgprofile-rela.test
index 141dfd4c65b1e..87dad02940b98 100644
--- a/lld/test/ELF/cgprofile-rela.test
+++ b/lld/test/ELF/cgprofile-rela.test
@@ -8,6 +8,10 @@
 # RUN: ld.lld --no-call-graph-profile-sort %t.o -o %t
 # RUN: llvm-nm --no-sort %t | FileCheck %s --check-prefix=NO-CG
 
+# RUN: yaml2obj -DTYPE=SHT_CREL %s -o %tcrel.o
+# RUN: ld.lld --call-graph-profile-sort=hfsort %tcrel.o -o %t
+# RUN: llvm-nm --no-sort %t | FileCheck %s
+
 # CHECK: 0000000000201124 t D
 # CHECK: 0000000000201122 t C
 # CHECK: 0000000000201128 t B
@@ -60,7 +64,7 @@ Sections:
       - Weight: 30
       - Weight: 90
   - Name: .rela.llvm.call-graph-profile
-    Type: SHT_RELA
+    Type: [[TYPE=SHT_RELA]]
     Info: .llvm.call-graph-profile
     Relocations:
       - Offset: 0x0

From 39746ee0048e1f4891d2e2b58e7c7583df43c3bc Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 8 Aug 2024 12:02:44 -0700
Subject: [PATCH 157/427] [ELF] scanRelocations: support .crel.eh_frame

Follow-up to #98115. For EhInputSection, RelocationScanner::scan calls
sortRels, which doesn't support the CREL iterator. We should set
supportsCrel to false to ensure that the initial_location fields in
.eh_frame FDEs are relocated.

(cherry picked from commit a821fee312d15941174827a70cb534c2f2fe1177)
---
 lld/ELF/Relocations.cpp | 10 ++++++----
 lld/test/ELF/crel.s     |  8 ++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index e19b1e6c8efb8..707768dee6d38 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -459,7 +459,8 @@ class OffsetGetter {
 // InputSectionBase.
 class RelocationScanner {
 public:
-  template <class ELFT> void scanSection(InputSectionBase &s);
+  template <class ELFT>
+  void scanSection(InputSectionBase &s, bool isEH = false);
 
 private:
   InputSectionBase *sec;
@@ -1617,10 +1618,11 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
                       });
 }
 
-template <class ELFT> void RelocationScanner::scanSection(InputSectionBase &s) {
+template <class ELFT>
+void RelocationScanner::scanSection(InputSectionBase &s, bool isEH) {
   sec = &s;
   getter = OffsetGetter(s);
-  const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>();
+  const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>(!isEH);
   if (rels.areRelocsCrel())
     scan<ELFT>(rels.crels);
   else if (rels.areRelocsRel())
@@ -1658,7 +1660,7 @@ template <class ELFT> void elf::scanRelocations() {
     RelocationScanner scanner;
     for (Partition &part : partitions) {
       for (EhInputSection *sec : part.ehFrame->sections)
-        scanner.template scanSection<ELFT>(*sec);
+        scanner.template scanSection<ELFT>(*sec, /*isEH=*/true);
       if (part.armExidx && part.armExidx->isLive())
         for (InputSection *sec : part.armExidx->exidxSections)
           if (sec->isLive())
diff --git a/lld/test/ELF/crel.s b/lld/test/ELF/crel.s
index d7c87be9a5402..1de3f314fc677 100644
--- a/lld/test/ELF/crel.s
+++ b/lld/test/ELF/crel.s
@@ -5,6 +5,7 @@
 # RUN: ld.lld -pie a.o b.o -o out
 # RUN: llvm-objdump -d out | FileCheck %s
 # RUN: llvm-readelf -Srs out | FileCheck %s --check-prefix=RELOC
+# RUN: llvm-dwarfdump --eh-frame out | FileCheck %s --check-prefix=UNWIND
 
 # CHECK:       <_start>:
 # CHECK-NEXT:    callq {{.*}} <foo>
@@ -18,6 +19,13 @@
 
 # RELOC:  {{0*}}[[#DATA+8]]  0000000000000008 R_X86_64_RELATIVE [[#%x,DATA+0x8000000000000000]]
 
+# RELOC:      00000000000012f4     0 NOTYPE  GLOBAL DEFAULT [[#]] _start
+# RELOC-NEXT: 00000000000012fe     0 NOTYPE  GLOBAL DEFAULT [[#]] foo
+
+## initial_location fields in FDEs are correctly relocated.
+# UNWIND: 00000018 00000010 0000001c FDE cie=00000000 pc=000012f4...000012fe
+# UNWIND: 0000002c 00000010 00000030 FDE cie=00000000 pc=000012fe...0000130c
+
 # RUN: ld.lld -pie --emit-relocs a.o b.o -o out1
 # RUN: llvm-objdump -dr out1 | FileCheck %s --check-prefix=CHECKE
 # RUN: llvm-readelf -Sr out1 | FileCheck %s --check-prefix=RELOCE

From d893708ded7b5e8cfafae777bf88928761231994 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 7 Aug 2024 12:23:28 -0700
Subject: [PATCH 158/427] Revert "demangle function names in trace files
 (#87626)"

This reverts commit 0fa20c55b58deb94090985a5c5ffda4d5ceb3cd1.

Storing raw symbol names is generally preferred in profile files.
Demangling might lose information. Language frontends might use
demangling schemes not supported by LLVMDemangle
(https://github.com/llvm/llvm-project/issues/45901#issuecomment-2008686663).
In addition, calling `demangle` for each function has a significant
performance overhead (#102222).

I believe that even if we decide to provide a producer-side demangling,
it would not be on by default.

Pull Request: https://github.com/llvm/llvm-project/pull/102274

(cherry picked from commit 72b73e23b6c36537db730ebea00f92798108a6e5)
---
 clang/test/Driver/ftime-trace-sections.py         | 11 -----------
 llvm/lib/IR/LegacyPassManager.cpp                 |  4 +---
 llvm/lib/Passes/CMakeLists.txt                    |  1 -
 llvm/lib/Passes/StandardInstrumentations.cpp      |  9 ++++-----
 utils/bazel/llvm-project-overlay/llvm/BUILD.bazel |  1 -
 5 files changed, 5 insertions(+), 21 deletions(-)
 mode change 100755 => 100644 clang/test/Driver/ftime-trace-sections.py

diff --git a/clang/test/Driver/ftime-trace-sections.py b/clang/test/Driver/ftime-trace-sections.py
old mode 100755
new mode 100644
index b332931d29a62..02afa4ac54eb7
--- a/clang/test/Driver/ftime-trace-sections.py
+++ b/clang/test/Driver/ftime-trace-sections.py
@@ -19,10 +19,7 @@ def is_before(range1, range2):
 
 log_contents = json.loads(sys.stdin.read())
 events = log_contents["traceEvents"]
-
-instants = [event for event in events if event["name"] == "InstantiateFunction"]
 codegens = [event for event in events if event["name"] == "CodeGen Function"]
-opts = [event for event in events if event["name"] == "OptFunction"]
 frontends = [event for event in events if event["name"] == "Frontend"]
 backends = [event for event in events if event["name"] == "Backend"]
 
@@ -51,11 +48,3 @@ def is_before(range1, range2):
     ]
 ):
     sys.exit("Not all Frontend section are before all Backend sections!")
-
-# Check that entries for foo exist and are in a demangled form.
-if not any(e for e in instants if "foo<int>" in e["args"]["detail"]):
-    sys.exit("Missing Instantiate entry for foo!")
-if not any(e for e in codegens if "foo<int>" in e["args"]["detail"]):
-    sys.exit("Missing CodeGen entry for foo!")
-if not any(e for e in opts if "foo<int>" in e["args"]["detail"]):
-    sys.exit("Missing Optimize entry for foo!")
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 9c44eff7953ac..01aaedcf7d547 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
@@ -1416,8 +1415,7 @@ bool FPPassManager::runOnFunction(Function &F) {
 
   // Store name outside of loop to avoid redundant calls.
   const StringRef Name = F.getName();
-  llvm::TimeTraceScope FunctionScope(
-      "OptFunction", [&F]() { return demangle(F.getName().str()); });
+  llvm::TimeTraceScope FunctionScope("OptFunction", Name);
 
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
diff --git a/llvm/lib/Passes/CMakeLists.txt b/llvm/lib/Passes/CMakeLists.txt
index b5224327d7921..6425f4934b210 100644
--- a/llvm/lib/Passes/CMakeLists.txt
+++ b/llvm/lib/Passes/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_component_library(LLVMPasses
   CodeGen
   Core
   Coroutines
-  Demangle
   HipStdPar
   IPO
   InstCombine
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index fc7b82d522bf0..4eff2deef9abc 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -23,7 +23,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineVerifier.h"
-#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
@@ -236,12 +235,12 @@ void printIR(raw_ostream &OS, const MachineFunction *MF) {
   MF->print(OS);
 }
 
-std::string getIRName(Any IR, bool demangled = false) {
+std::string getIRName(Any IR) {
   if (unwrapIR<Module>(IR))
     return "[module]";
 
   if (const auto *F = unwrapIR<Function>(IR))
-    return demangled ? demangle(F->getName()) : F->getName().str();
+    return F->getName().str();
 
   if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
     return C->getName();
@@ -251,7 +250,7 @@ std::string getIRName(Any IR, bool demangled = false) {
            L->getHeader()->getParent()->getName().str();
 
   if (const auto *MF = unwrapIR<MachineFunction>(IR))
-    return demangled ? demangle(MF->getName()) : MF->getName().str();
+    return MF->getName().str();
 
   llvm_unreachable("Unknown wrapped IR type");
 }
@@ -1589,7 +1588,7 @@ void TimeProfilingPassesHandler::registerCallbacks(
 }
 
 void TimeProfilingPassesHandler::runBeforePass(StringRef PassID, Any IR) {
-  timeTraceProfilerBegin(PassID, getIRName(IR, true));
+  timeTraceProfilerBegin(PassID, getIRName(IR));
 }
 
 void TimeProfilingPassesHandler::runAfterPass() { timeTraceProfilerEnd(); }
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 4d443e809d55b..4a59c16ba12fc 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2653,7 +2653,6 @@ cc_library(
         ":CodeGen",
         ":Core",
         ":Coroutines",
-        ":Demangle",
         ":HipStdPar",
         ":IPO",
         ":IRPrinter",

From f3261a5a64d8ff2c76eb9c1ae2aa7d348c75e97c Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Fri, 9 Aug 2024 14:25:07 +0100
Subject: [PATCH 159/427] [AArch64] Add invalid 1 x vscale costs for reductions
 and reduction-operations. (#102105)

The code-generator is currently not able to handle scalable vectors of
<vscale x 1 x eltty>. The usual "fix" for this until it is supported is
to mark the costs of loads/stores with an invalid cost, preventing the
vectorizer from vectorizing at those factors. But on rare occasions
loops do not contain load/stores, only reductions.

So whilst this is still unsupported return an invalid cost to avoid
selecting vscale x 1 VFs. The cost of a reduction is not currently used
by the vectorizer so this adds the cost to the add/mul/and/or/xor or
min/max that should feed the reduction. It includes reduction costs
too, for completeness. This change will be removed when code-generation
for these types is sufficiently reliable.

Fixes #99760

(cherry picked from commit 0b745a10843fc85e579bbf459f78b3f43e7ab309)
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 32 +++++++++++++++++
 .../CostModel/AArch64/arith-fp-sve.ll         |  4 +++
 .../Analysis/CostModel/AArch64/cttz_elts.ll   |  2 ++
 .../Analysis/CostModel/AArch64/sve-arith.ll   | 21 +++++++++++
 .../CostModel/AArch64/sve-intrinsics.ll       | 36 +++++++++++++++++++
 .../Analysis/CostModel/AArch64/sve-min-max.ll | 12 +++++++
 6 files changed, 107 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 237ba67b668fc..39fba6a257bb0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -541,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
 InstructionCost
 AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                       TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
   auto *RetTy = ICA.getReturnType();
+  if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy))
+    if (VTy->getElementCount() == ElementCount::getScalable(1))
+      return InstructionCost::getInvalid();
+
   switch (ICA.getID()) {
   case Intrinsic::experimental_vector_histogram_add:
     if (!ST->hasSVE2())
@@ -3024,6 +3032,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     ArrayRef<const Value *> Args,
     const Instruction *CxtI) {
 
+  // The code-generator is currently not able to handle scalable vectors
+  // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
+    if (VTy->getElementCount() == ElementCount::getScalable(1))
+      return InstructionCost::getInvalid();
+
   // TODO: Handle more cost kinds.
   if (CostKind != TTI::TCK_RecipThroughput)
     return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3798,6 +3814,14 @@ InstructionCost
 AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
                                        FastMathFlags FMF,
                                        TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
+    if (VTy->getElementCount() == ElementCount::getScalable(1))
+      return InstructionCost::getInvalid();
+
   std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
 
   if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3842,6 +3866,14 @@ InstructionCost
 AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
                                            std::optional<FastMathFlags> FMF,
                                            TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
+    if (VTy->getElementCount() == ElementCount::getScalable(1))
+      return InstructionCost::getInvalid();
+
   if (TTI::requiresOrderedReduction(FMF)) {
     if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
       InstructionCost BaseCost =
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
index 18a1c31c03f74..770d3087b0752 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -8,6 +8,7 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = fadd <vscale x 1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -19,6 +20,7 @@ define void @fadd() {
   %V8F16 = fadd <vscale x 8 x half> undef, undef
   %V16F16 = fadd <vscale x 16 x half> undef, undef
 
+  %V1F32 = fadd <vscale x 1 x float> undef, undef
   %V2F32 = fadd <vscale x 2 x float> undef, undef
   %V4F32 = fadd <vscale x 4 x float> undef, undef
   %V8F32 = fadd <vscale x 8 x float> undef, undef
@@ -34,6 +36,7 @@ define void @fsub() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = fsub <vscale x 1 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef
@@ -45,6 +48,7 @@ define void @fsub() {
   %V8F16 = fsub <vscale x 8 x half> undef, undef
   %V16F16 = fsub <vscale x 16 x half> undef, undef
 
+  %V1F32 = fsub <vscale x 1 x float> undef, undef
   %V2F32 = fsub <vscale x 2 x float> undef, undef
   %V4F32 = fsub <vscale x 4 x float> undef, undef
   %V8F32 = fsub <vscale x 8 x float> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
index e1a9ee114d261..98d5bd5bd13f4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll
@@ -3,6 +3,7 @@
 
 define void @foo_no_vscale_range() {
 ; CHECK-LABEL: 'foo_no_vscale_range'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
@@ -45,6 +46,7 @@ define void @foo_no_vscale_range() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true)
   %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true)
   %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true)
   %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
index f4dfea4cce349..46450e68f40e2 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll
@@ -43,6 +43,7 @@ define void @scalable_mul() #0 {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 entry:
@@ -50,6 +51,26 @@ entry:
   %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef
   %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef
   %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef
+  %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef
+
+  ret void
+}
+
+define void @scalable_add() #0 {
+; CHECK-LABEL: 'scalable_add'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_nxv16i8 = add <vscale x 16 x i8> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_nxv8i16 = add <vscale x 8 x i16> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_nxv4i32 = add <vscale x 4 x i32> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %add_nxv2i64 = add <vscale x 2 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %add_nxv1i64 = add <vscale x 1 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %add_nxv16i8 = add <vscale x 16 x i8> undef, undef
+  %add_nxv8i16 = add <vscale x 8 x i16> undef, undef
+  %add_nxv4i32 = add <vscale x 4 x i32> undef, undef
+  %add_nxv2i64 = add <vscale x 2 x i64> undef, undef
+  %add_nxv1i64 = add <vscale x 1 x i64> undef, undef
 
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 1993023c91e26..a3e260d211c43 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -116,82 +116,118 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64
 
 define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) {
 ; CHECK-LABEL: 'reductions'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'reductions'
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef)
   %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
   %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
+  %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef)
   %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
   %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
+  %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef)
   %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
   %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
+  %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef)
   %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
   %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
+  %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef)
   %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
   %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
+  %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef)
   %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
   %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
+  %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef)
   %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
   %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
+  %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef)
   %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
   %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
+  %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef)
   %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
   %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
 
+  %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
   %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2)
   %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3)
+  %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef)
   %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
   %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+  %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef)
   %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
   %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
 
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
index 7a801db1f35fa..777f4a2c4a64e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
@@ -14,6 +14,7 @@ define void @umin() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -30,6 +31,7 @@ define void @umin() {
   %V4i16 = call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
   %V8i16 = call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
   %V16i16 = call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  %V1i32 = call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
   %V2i32 = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
   %V4i32 = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
   %V8i32 = call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -49,6 +51,7 @@ define void @umax() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -65,6 +68,7 @@ define void @umax() {
   %V4i16 = call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
   %V8i16 = call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
   %V16i16 = call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  %V1i32 = call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
   %V2i32 = call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
   %V4i32 = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
   %V8i32 = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -84,6 +88,7 @@ define void @smin() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -100,6 +105,7 @@ define void @smin() {
   %V4i16 = call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
   %V8i16 = call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
   %V16i16 = call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  %V1i32 = call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
   %V2i32 = call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
   %V4i32 = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
   %V8i32 = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -119,6 +125,7 @@ define void @smax() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -135,6 +142,7 @@ define void @smax() {
   %V4i16 = call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef)
   %V8i16 = call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
   %V16i16 = call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef)
+  %V1i32 = call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef)
   %V2i32 = call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef)
   %V4i32 = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
   %V8i32 = call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef)
@@ -145,6 +153,7 @@ define void @smax() {
 
 define void @minnum() {
 ; CHECK-LABEL: 'minnum'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1f32 = call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
@@ -156,6 +165,7 @@ define void @minnum() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %V1f32 = call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
   %V2f32 = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
   %V4f32 = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
   %V8f32 = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
@@ -170,6 +180,7 @@ define void @minnum() {
 
 define void @maxnum() {
 ; CHECK-LABEL: 'maxnum'
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1f32 = call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
@@ -181,6 +192,7 @@ define void @maxnum() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  %V1f32 = call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
   %V2f32 = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
   %V4f32 = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
   %V8f32 = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)

From ccd8d0497a92c50e52933cd5d712a32a192ba6aa Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Fri, 9 Aug 2024 11:49:50 -0700
Subject: [PATCH 160/427] [clang] Wire -fptrauth-returns to "ptrauth-returns"
 fn attribute. (#102416)

We already ended up with -fptrauth-returns, the feature macro, the lang
opt, and the actual backend lowering.

The only part left is threading it all through PointerAuthOptions, to
drive the addition of the "ptrauth-returns" attribute to generated
functions.
While there, do minor cleanup on ptrauth-function-attributes.c.

This also adds ptrauth_key_return_address to ptrauth.h.

(cherry picked from commit 2eb6e30fe83ccce3cf01e596e73fa6385facd44b)
---
 clang/include/clang/Basic/PointerAuthOptions.h   | 3 +++
 clang/lib/CodeGen/CodeGenFunction.cpp            | 2 ++
 clang/lib/Frontend/CompilerInvocation.cpp        | 4 +++-
 clang/lib/Headers/ptrauth.h                      | 6 ++++++
 clang/test/CodeGen/ptrauth-function-attributes.c | 9 +++++++--
 5 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/PointerAuthOptions.h b/clang/include/clang/Basic/PointerAuthOptions.h
index 417b4b00648c7..c0ab35bce5d84 100644
--- a/clang/include/clang/Basic/PointerAuthOptions.h
+++ b/clang/include/clang/Basic/PointerAuthOptions.h
@@ -159,6 +159,9 @@ class PointerAuthSchema {
 };
 
 struct PointerAuthOptions {
+  /// Should return addresses be authenticated?
+  bool ReturnAddresses = false;
+
   /// Do indirect goto label addresses need to be authenticated?
   bool IndirectGotos = false;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index af201554898f3..4dc57d0ff5b26 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -880,6 +880,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
 
   // Add pointer authentication attributes.
   const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts();
+  if (CodeGenOpts.PointerAuth.ReturnAddresses)
+    Fn->addFnAttr("ptrauth-returns");
   if (CodeGenOpts.PointerAuth.FunctionPointers)
     Fn->addFnAttr("ptrauth-calls");
   if (CodeGenOpts.PointerAuth.IndirectGotos)
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index f6b6c44a4cab6..fa5d076c202a3 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1505,13 +1505,15 @@ void CompilerInvocation::setDefaultPointerAuthOptions(
         PointerAuthSchema(Key::ASIA, false, Discrimination::Type);
   }
   Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
+  Opts.ReturnAddresses = LangOpts.PointerAuthReturns;
 }
 
 static void parsePointerAuthOptions(PointerAuthOptions &Opts,
                                     const LangOptions &LangOpts,
                                     const llvm::Triple &Triple,
                                     DiagnosticsEngine &Diags) {
-  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos)
+  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos &&
+      !LangOpts.PointerAuthReturns)
     return;
 
   CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple);
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index 4724155b0dc79..154b599862a8e 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -28,6 +28,12 @@ typedef enum {
   /* A process-specific key which can be used to sign data pointers. */
   ptrauth_key_process_dependent_data = ptrauth_key_asdb,
 
+  /* The key used to sign return addresses on the stack.
+     The extra data is based on the storage address of the return address.
+     On AArch64, that is always the storage address of the return address + 8
+     (or, in other words, the value of the stack pointer on function entry) */
+  ptrauth_key_return_address = ptrauth_key_process_dependent_code,
+
   /* The key used to sign C function pointers.
      The extra data is always 0. */
   ptrauth_key_function_pointer = ptrauth_key_process_independent_code,
diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c b/clang/test/CodeGen/ptrauth-function-attributes.c
index 6a09cd37bf485..17ebf9d6e2e01 100644
--- a/clang/test/CodeGen/ptrauth-function-attributes.c
+++ b/clang/test/CodeGen/ptrauth-function-attributes.c
@@ -1,11 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios                    -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple arm64e-apple-ios                   -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple aarch64-linux-gnu                  -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,OFF
 
-// RUN: %clang_cc1 -triple arm64-apple-ios  -fptrauth-calls   -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,CALLS
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls  -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,CALLS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls  -emit-llvm %s  -o - | FileCheck %s --check-prefixes=ALL,CALLS
 
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS
+
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
-// RUN: %clang_cc1 -triple arm64e-apple-ios  -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
 
 // ALL: define {{(dso_local )?}}void @test() #0
@@ -14,6 +17,8 @@ void test() {
 
 // CALLS: attributes #0 = {{{.*}} "ptrauth-calls" {{.*}}}
 
+// RETS: attributes #0 = {{{.*}} "ptrauth-returns" {{.*}}}
+
 // GOTOS: attributes #0 = {{{.*}} "ptrauth-indirect-gotos" {{.*}}}
 
 // OFF-NOT: attributes {{.*}} "ptrauth-

From e908a0099ce392d694eefbeda27aadfe54922ba5 Mon Sep 17 00:00:00 2001
From: Alex Langford <alangford@apple.com>
Date: Fri, 9 Aug 2024 12:50:42 -0700
Subject: [PATCH 161/427] [lldb] Move definition of SBSaveCoreOptions dtor out
 of header (#102539)

This class is technically not usable in its current state. When you use
it in a simple C++ project, your compiler will complain about an
incomplete definition of SaveCoreOptions. Normally this isn't a problem,
other classes in the SBAPI do this. The difference is that
SBSaveCoreOptions has a default destructor in the header, so the
compiler will attempt to generate the code for the destructor with an
incomplete definition of the impl type.

All methods for every class, including constructors and destructors,
must have a separate implementation not in a header.

(cherry picked from commit 101cf540e698529d3dd899d00111bcb654a3c12b)
---
 lldb/include/lldb/API/SBSaveCoreOptions.h | 2 +-
 lldb/source/API/SBSaveCoreOptions.cpp     | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h b/lldb/include/lldb/API/SBSaveCoreOptions.h
index e77496bd3a4a0..75506fd752e76 100644
--- a/lldb/include/lldb/API/SBSaveCoreOptions.h
+++ b/lldb/include/lldb/API/SBSaveCoreOptions.h
@@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions {
 public:
   SBSaveCoreOptions();
   SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs);
-  ~SBSaveCoreOptions() = default;
+  ~SBSaveCoreOptions();
 
   const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs);
 
diff --git a/lldb/source/API/SBSaveCoreOptions.cpp b/lldb/source/API/SBSaveCoreOptions.cpp
index 6c3f74596203d..19ca83f932bcf 100644
--- a/lldb/source/API/SBSaveCoreOptions.cpp
+++ b/lldb/source/API/SBSaveCoreOptions.cpp
@@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions &rhs) {
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
+SBSaveCoreOptions::~SBSaveCoreOptions() = default;
+
 const SBSaveCoreOptions &
 SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) {
   LLDB_INSTRUMENT_VA(this, rhs);

From a02ee2f6ff2e806c69851182fd6649338020dcae Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail@gmail.com>
Date: Mon, 5 Aug 2024 14:02:15 +0200
Subject: [PATCH 162/427] [Clang] Define __cpp_pack_indexing (#101956)

Following the discussion on #101448 this defines
`__cpp_pack_indexing`. Since pack indexing is currently
supported in all language modes, the feature test macro
is also defined in all language modes.

(cherry picked from commit c65afad9c58474a784633314e945c874ed06584a)
---
 clang/docs/LanguageExtensions.rst       | 1 +
 clang/lib/Frontend/InitPreprocessor.cpp | 1 +
 clang/test/Lexer/cxx-features.cpp       | 4 ++++
 3 files changed, 6 insertions(+)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 81784c75081ba..1c4a6ecca2142 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1503,6 +1503,7 @@ Conditional ``explicit``                     __cpp_conditional_explicit       C+
 ``static operator()``                        __cpp_static_call_operator       C++23         C++03
 Attributes on Lambda-Expressions                                              C++23         C++11
 Attributes on Structured Bindings            __cpp_structured_bindings        C++26         C++03
+Pack Indexing                                __cpp_pack_indexing              C++26         C++03
 ``= delete ("should have a reason");``       __cpp_deleted_function           C++26         C++03
 -------------------------------------------- -------------------------------- ------------- -------------
 Designated initializers (N494)                                                C99           C89
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 920ddf7e59913..3ed7243deba8a 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -763,6 +763,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   Builder.defineMacro("__cpp_placeholder_variables", "202306L");
 
   // C++26 features supported in earlier language modes.
+  Builder.defineMacro("__cpp_pack_indexing", "202311L");
   Builder.defineMacro("__cpp_deleted_function", "202403L");
 
   if (LangOpts.Char8)
diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index 4c2aa3ae2c544..08b732132228b 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -38,6 +38,10 @@
 #error "wrong value for __cpp_deleted_function"
 #endif
 
+#if check(pack_indexing, 202311, 202311, 202311, 202311, 202311, 202311, 202311)
+#error "wrong value for __cpp_pack_indexing"
+#endif
+
 #if check(placeholder_variables, 202306, 202306, 202306, 202306, 202306, 202306, 202306)
 #error "wrong value for __cpp_placeholder_variables"
 #endif

From 5972d4ddf9a66eb2444c048aab1f5896f0a30d15 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 5 Aug 2024 14:40:46 -0700
Subject: [PATCH 163/427] workflows/release-binaries-all: Pass secrets on to
 release-binaries workflow (#101866)

A called workflow does not have access to secrets by default, so we need
to explicitly pass any secret that we want to use.

(cherry picked from commit 1fb1a5d8e2c5a0cbaeb39ead68352e5e55752a6d)
---
 .github/workflows/release-binaries-all.yml | 6 +++++-
 .github/workflows/release-binaries.yml     | 5 +++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release-binaries-all.yml b/.github/workflows/release-binaries-all.yml
index 73c9d96946e33..394b0c74d24ed 100644
--- a/.github/workflows/release-binaries-all.yml
+++ b/.github/workflows/release-binaries-all.yml
@@ -91,4 +91,8 @@ jobs:
       release-version: "${{ needs.setup-variables.outputs.release-version }}"
       upload: ${{ needs.setup-variables.outputs.upload == 'true'}}
       runs-on: "${{ matrix.runs-on }}"
-
+    secrets:
+      # This will be empty for pull_request events, but that's fine, because
+      # the release-binaries workflow does not use this secret for the
+      # pull_request event.
+      RELEASE_TASKS_USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index fae04e19b246b..509016e5b89c4 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -37,6 +37,11 @@ on:
         description: "Runner to use for the build"
         required: true
         type: string
+    secrets:
+      RELEASE_TASKS_USER_TOKEN:
+        description: "Secret used to check user permissions."
+        required: false
+
 
 permissions:
   contents: read # Default everything to read-only

From b2de37237c9ad0b3efafb101c23b075445a6c4ef Mon Sep 17 00:00:00 2001
From: Sharadh Rajaraman <r.sharadh@outlook.sg>
Date: Tue, 6 Aug 2024 16:05:55 +0100
Subject: [PATCH 164/427] [clang][driver][clang-cl] Support `--precompile` and
 `-fmodule-*` options in Clang-CL (#98761)

This PR is the first step in improving the situation for `clang-cl`
detailed in [this LLVM Discourse
thread](https://discourse.llvm.org/t/clang-cl-exe-support-for-c-modules/72257/28).
There has been some work done in #89772. I believe this is somewhat
orthogonal.

This is a work-in-progress; the functionality has only been tested with
the [basic 'Hello World'
example](https://clang.llvm.org/docs/StandardCPlusPlusModules.html#quick-start),
and proper test cases need to be written. I'd like some thoughts on
this, thanks!

Partially resolves #64118.

(cherry picked from commit bd576fe34285c4dcd04837bf07a89a9c00e3cd5e)
---
 clang/docs/StandardCPlusPlusModules.rst | 17 ++++++++++-------
 clang/docs/UsersManual.rst              |  6 ++++++
 clang/include/clang/Driver/Options.td   |  9 ++++++---
 clang/test/Driver/cl-cxx20-modules.cppm |  8 ++++++++
 4 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/Driver/cl-cxx20-modules.cppm

diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst
index b87491910e222..2478a77e7640c 100644
--- a/clang/docs/StandardCPlusPlusModules.rst
+++ b/clang/docs/StandardCPlusPlusModules.rst
@@ -398,6 +398,16 @@ BMIs cannot be shipped in an archive to create a module library. Instead, the
 BMIs(``*.pcm``) are compiled into object files(``*.o``) and those object files
 are added to the archive instead.
 
+clang-cl
+~~~~~~~~
+
+``clang-cl`` supports the same options as ``clang++`` for modules as detailed above;
+there is no need to prefix these options with ``/clang:``. Note that ``cl.exe``
+`options to emit/consume IFC files <https://devblogs.microsoft.com/cppblog/using-cpp-modules-in-msvc-from-the-command-line-part-1/>` are *not* supported.
+The resultant precompiled modules are also not compatible for use with ``cl.exe``.
+
+We recommend that build system authors use the above-mentioned ``clang++`` options  with ``clang-cl`` to build modules.
+
 Consistency Requirements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1387,13 +1397,6 @@ have ``.cppm`` (or ``.ccm``, ``.cxxm``, ``.c++m``) as the file extension.
 However, the behavior is inconsistent with other compilers. This is tracked by
 `#57416 <https://github.com/llvm/llvm-project/issues/57416>`_.
 
-clang-cl is not compatible with standard C++ modules
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``/clang:-fmodule-file`` and ``/clang:-fprebuilt-module-path`` cannot be used
-to specify the BMI with ``clang-cl.exe``. This is tracked by
-`#64118 <https://github.com/llvm/llvm-project/issues/64118>`_.
-
 Incorrect ODR violation diagnostics
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index e9b95739ea2ab..64e991451bf70 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -4745,6 +4745,12 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -flto=<value>           Set LTO mode to either 'full' or 'thin'
       -flto                   Enable LTO in 'full' mode
       -fmerge-all-constants   Allow merging of constants
+      -fmodule-file=<module_name>=<module-file>
+                              Use the specified module file that provides the module <module_name>
+      -fmodule-header=<header>
+                              Build <header> as a C++20 header unit
+      -fmodule-output=<path>
+                              Save intermediate module file results when compiling a standard C++ module unit.
       -fms-compatibility-version=<value>
                               Dot-separated value representing the Microsoft compiler version
                               number to report in _MSC_VER (0 = don't define it; default is same value as installed cl.exe, or 1933)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 014a2bd85fdc6..5e412cb5f5189 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3107,7 +3107,7 @@ def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Grou
   HelpText<"Specify the module user build path">,
   MarshallingInfoString<HeaderSearchOpts<"ModuleUserBuildPath">>;
 def fprebuilt_module_path : Joined<["-"], "fprebuilt-module-path=">, Group<i_Group>,
-  Flags<[]>, Visibility<[ClangOption, CC1Option]>,
+  Flags<[]>, Visibility<[ClangOption, CLOption, CC1Option]>,
   MetaVarName<"<directory>">,
   HelpText<"Specify the prebuilt module path">;
 defm prebuilt_implicit_modules : BoolFOption<"prebuilt-implicit-modules",
@@ -3116,11 +3116,11 @@ defm prebuilt_implicit_modules : BoolFOption<"prebuilt-implicit-modules",
   NegFlag<SetFalse>, BothFlags<[], [ClangOption, CC1Option]>>;
 
 def fmodule_output_EQ : Joined<["-"], "fmodule-output=">,
-  Flags<[NoXarchOption]>, Visibility<[ClangOption, CC1Option]>,
+  Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>,
   MarshallingInfoString<FrontendOpts<"ModuleOutputPath">>,
   HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">;
 def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, CLOption, CC1Option]>,
   HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">;
 
 defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf",
@@ -3300,8 +3300,10 @@ def fretain_comments_from_system_headers : Flag<["-"], "fretain-comments-from-sy
   Visibility<[ClangOption, CC1Option]>,
   MarshallingInfoFlag<LangOpts<"RetainCommentsFromSystemHeaders">>;
 def fmodule_header : Flag <["-"], "fmodule-header">, Group<f_Group>,
+  Visibility<[ClangOption, CLOption]>,
   HelpText<"Build a C++20 Header Unit from a header">;
 def fmodule_header_EQ : Joined<["-"], "fmodule-header=">, Group<f_Group>,
+  Visibility<[ClangOption, CLOption]>,
   MetaVarName<"<kind>">,
   HelpText<"Build a C++20 Header Unit from a header that should be found in the user (fmodule-header=user) or system (fmodule-header=system) search path.">;
 
@@ -5946,6 +5948,7 @@ def _output : Separate<["--"], "output">, Alias<o>;
 def _param : Separate<["--"], "param">, Group<CompileOnly_Group>;
 def _param_EQ : Joined<["--"], "param=">, Alias<_param>;
 def _precompile : Flag<["--"], "precompile">, Flags<[NoXarchOption]>,
+  Visibility<[ClangOption, CLOption]>,
   Group<Action_Group>, HelpText<"Only precompile the input">;
 def _prefix_EQ : Joined<["--"], "prefix=">, Alias<B>;
 def _prefix : Separate<["--"], "prefix">, Alias<B>;
diff --git a/clang/test/Driver/cl-cxx20-modules.cppm b/clang/test/Driver/cl-cxx20-modules.cppm
new file mode 100644
index 0000000000000..06df929c42342
--- /dev/null
+++ b/clang/test/Driver/cl-cxx20-modules.cppm
@@ -0,0 +1,8 @@
+// RUN: %clang_cl /std:c++20 --precompile -### -- %s 2>&1 | FileCheck --check-prefix=PRECOMPILE %s
+// PRECOMPILE: -emit-module-interface
+
+// RUN: %clang_cl /std:c++20 --fmodule-file=Foo=Foo.pcm -### -- %s 2>&1 | FileCheck --check-prefix=FMODULEFILE %s
+// FMODULEFILE: -fmodule-file=Foo=Foo.pcm
+
+// RUN: %clang_cl /std:c++20 --fprebuilt-module-path=. -### -- %s 2>&1 | FileCheck --check-prefix=FPREBUILT %s
+// FPREBUILT: -fprebuilt-module-path=.

From eea8e4ddd8206a5b04a7c7217e76a48ea2ff386b Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 7 Aug 2024 14:19:22 -0700
Subject: [PATCH 165/427] workflows: Fix permissions for release-sources job
 (#100750)

For reusable workflows, the called workflow cannot upgrade it's
permissions, and since the default permission is none, we need to
explicitly declare 'contents: read' when calling the release-sources
workflow.

Fixes the error:
The workflow is requesting 'contents: read', but is only allowed
'contents: none'.

(cherry picked from commit 82c2259aeb87f5cb418decfb6a1961287055e5d2)
---
 .github/workflows/release-tasks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index b7fea0785fed2..cf42730aaf817 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -101,6 +101,7 @@ jobs:
   release-sources:
     name: Package Release Sources
     permissions:
+      contents: read
       id-token: write
       attestations: write
     needs:

From 866686180a316aee091c82c924971a238fbbd817 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss@arm.com>
Date: Fri, 9 Aug 2024 17:51:38 +0200
Subject: [PATCH 166/427] [Arm][AArch64][Clang] Respect function's branch
 protection attributes. (#101978)

Default attributes assigned to all functions according to the command
line parameters. Some functions might have their own attributes and we
need to set or remove attributes accordingly.
Tests are updated to test this scenarios too.

(cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb)
---
 clang/lib/CodeGen/CGCall.cpp                  |  2 +-
 clang/lib/CodeGen/TargetInfo.cpp              | 32 ++++++++++++++++---
 clang/lib/CodeGen/TargetInfo.h                |  7 ++--
 .../CodeGen/aarch64-branch-protection-attr.c  | 13 +++++++-
 .../CodeGen/arm-branch-protection-attr-1.c    |  6 ++++
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 234a9c16e39df..6e69e84a2344c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes(
   }
 
   TargetInfo::BranchProtectionInfo BPI(LangOpts);
-  TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs);
+  TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs);
 }
 
 /// Merges `target-features` from \TargetOpts and \F, and sets the result in
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 38faa50cf19cf..64a9a5554caf7 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
 
 void TargetCodeGenInfo::setBranchProtectionFnAttributes(
     const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) {
-  llvm::AttrBuilder FuncAttrs(F.getContext());
-  setBranchProtectionFnAttributes(BPI, FuncAttrs);
-  F.addFnAttrs(FuncAttrs);
+  // Called on already created and initialized function where attributes already
+  // set from command line attributes but some might need to be removed as the
+  // actual BPI is different.
+  if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
+    F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr());
+    F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr());
+  } else {
+    if (F.hasFnAttribute("sign-return-address"))
+      F.removeFnAttr("sign-return-address");
+    if (F.hasFnAttribute("sign-return-address-key"))
+      F.removeFnAttr("sign-return-address-key");
+  }
+
+  auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) {
+    if (Set)
+      F.addFnAttr(ModAttr);
+    else if (F.hasFnAttribute(ModAttr))
+      F.removeFnAttr(ModAttr);
+  };
+
+  AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement,
+                          "branch-target-enforcement");
+  AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR,
+                          "branch-protection-pauth-lr");
+  AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack");
 }
 
-void TargetCodeGenInfo::setBranchProtectionFnAttributes(
+void TargetCodeGenInfo::initBranchProtectionFnAttributes(
     const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) {
+  // Only used for initializing attributes in the AttrBuilder, which will not
+  // contain any of these attributes so no need to remove anything.
   if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
     FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr());
     FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr());
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 2f2138582ba1e..156b4ff4353be 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -414,13 +414,16 @@ class TargetCodeGenInfo {
     return nullptr;
   }
 
+  // Set the Branch Protection Attributes of the Function accordingly to the
+  // BPI. Remove attributes that contradict with current BPI.
   static void
   setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
                                   llvm::Function &F);
 
+  // Add the Branch Protection Attributes of the FuncAttrs.
   static void
-  setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
-                                  llvm::AttrBuilder &FuncAttrs);
+  initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
+                                   llvm::AttrBuilder &FuncAttrs);
 
 protected:
   static std::string qualifyWindowsLibrary(StringRef Lib);
diff --git a/clang/test/CodeGen/aarch64-branch-protection-attr.c b/clang/test/CodeGen/aarch64-branch-protection-attr.c
index e7ae7fb1570c9..c66bce1bee6d3 100644
--- a/clang/test/CodeGen/aarch64-branch-protection-attr.c
+++ b/clang/test/CodeGen/aarch64-branch-protection-attr.c
@@ -1,6 +1,18 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a %s -o - \
 // RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -mbranch-target-enforce %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -mguarded-control-stack %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -msign-return-address=non-leaf -msign-return-address-key=a_key %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -msign-return-address=all -msign-return-address-key=b_key %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -mbranch-protection-pauth-lr -msign-return-address=all -msign-return-address-key=a_key %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm  -target-cpu generic -target-feature +v8.5a -mguarded-control-stack -mbranch-target-enforce -mbranch-protection-pauth-lr -msign-return-address=all -msign-return-address-key=a_key %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
 
 __attribute__ ((target("branch-protection=none")))
 void none() {}
@@ -83,7 +95,6 @@ void gcs() {}
 
 // CHECK-DAG: attributes #[[#BTIPACLEAF]] = { {{.*}} "branch-target-enforcement" {{.*}}"sign-return-address"="all" "sign-return-address-key"="a_key"
 
-
 // CHECK-DAG: attributes #[[#PAUTHLR]] = { {{.*}} "branch-protection-pauth-lr" {{.*}}"sign-return-address"="non-leaf" "sign-return-address-key"="a_key"
 
 // CHECK-DAG: attributes #[[#PAUTHLR_BKEY]] = { {{.*}} "branch-protection-pauth-lr" {{.*}}"sign-return-address"="non-leaf" "sign-return-address-key"="b_key"
diff --git a/clang/test/CodeGen/arm-branch-protection-attr-1.c b/clang/test/CodeGen/arm-branch-protection-attr-1.c
index dd38cf347f04f..5ca6a1a4d13b4 100644
--- a/clang/test/CodeGen/arm-branch-protection-attr-1.c
+++ b/clang/test/CodeGen/arm-branch-protection-attr-1.c
@@ -1,6 +1,12 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple thumbv7m-unknown-unknown-eabi -emit-llvm %s -o - \
 // RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple thumbv7m-unknown-unknown-eabi -mbranch-target-enforce -emit-llvm %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple thumbv7m-unknown-unknown-eabi -msign-return-address=all -emit-llvm %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple thumbv7m-unknown-unknown-eabi -mbranch-target-enforce -msign-return-address=all -emit-llvm %s -o - \
+// RUN:                               | FileCheck %s --check-prefix=CHECK
 
 __attribute__((target("branch-protection=none"))) void none() {}
 // CHECK: define{{.*}} void @none() #[[#NONE:]]

From f6381989f454d3f819847272b0d3bd84d6785aef Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Thu, 8 Aug 2024 09:15:51 -0400
Subject: [PATCH 167/427] [NFC][libc++][test][AIX] UnXFAIL LIT test
 transform.pass.cpp (#102338)

Remove `XFAIL: LIBCXX-AIX-FIXME` from lit test `transform.pass.cpp` now
that AIX system call `wcsxfrm`/`wcsxfrm_l` is fixed in AIX 7.2.5.8 and
7.3.2.2 and buildbot machines have been upgraded.

Backported from commit cb5912a71061c6558bd4293596dcacc1ce0ca2f6
---
 libcxx/test/std/re/re.traits/transform.pass.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/test/std/re/re.traits/transform.pass.cpp b/libcxx/test/std/re/re.traits/transform.pass.cpp
index 369dbdf7053ba..80cd3f01faff2 100644
--- a/libcxx/test/std/re/re.traits/transform.pass.cpp
+++ b/libcxx/test/std/re/re.traits/transform.pass.cpp
@@ -8,7 +8,6 @@
 //
 // NetBSD does not support LC_COLLATE at the moment
 // XFAIL: netbsd
-// XFAIL: LIBCXX-AIX-FIXME
 
 // REQUIRES: locale.cs_CZ.ISO8859-2
 

From 4fd6b324ea2d44418832e7f90b2e5ffc1972c900 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Sat, 10 Aug 2024 22:54:07 +0200
Subject: [PATCH 168/427] [llvm-exegesis][unittests] Also disable
 SubprocessMemoryTest on SPARC (#102755)

Three `llvm-exegesis` tests
```
  LLVM-Unit :: tools/llvm-exegesis/./LLVMExegesisTests/SubprocessMemoryTest/DefinitionFillsCompletely
  LLVM-Unit :: tools/llvm-exegesis/./LLVMExegesisTests/SubprocessMemoryTest/MultipleDefinitions
  LLVM-Unit :: tools/llvm-exegesis/./LLVMExegesisTests/SubprocessMemoryTest/OneDefinition
```
`FAIL` on Linux/sparc64 like
```
llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp:68: Failure
Expected equality of these values:
  SharedMemoryMapping[I]
    Which is: '\0'
  ExpectedValue[I]
    Which is: '\xAA' (170)
```
It seems like this test only works on little-endian hosts: three
sub-tests are already disabled on powerpc and s390x (both big-endian),
and the fourth is additionally guarded against big-endian hosts (making
the other guards unnecessary).

However, since it's not been analyzed if this is really an endianess
issue, this patch disables the whole test on powerpc and s390x as before
adding sparc to the mix.

Tested on `sparc64-unknown-linux-gnu` and `x86_64-pc-linux-gnu`.

(cherry picked from commit a417083e27b155dc92b7f7271c0093aee0d7231c)
---
 .../X86/SubprocessMemoryTest.cpp              | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
index 7c23e7b7e9c5a..f61254ac74e14 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SubprocessMemoryTest.cpp
@@ -24,7 +24,8 @@
 namespace llvm {
 namespace exegesis {
 
-#if defined(__linux__) && !defined(__ANDROID__)
+#if defined(__linux__) && !defined(__ANDROID__) &&                             \
+    !(defined(__powerpc__) || defined(__s390x__) || defined(__sparc__))
 
 // This needs to be updated anytime a test is added or removed from the test
 // suite.
@@ -77,20 +78,12 @@ class SubprocessMemoryTest : public X86TestBase {
 // memory calls not working in some cases, so they have been disabled.
 // TODO(boomanaiden154): Investigate and fix this issue on PPC.
 
-#if defined(__powerpc__) || defined(__s390x__)
-TEST_F(SubprocessMemoryTest, DISABLED_OneDefinition) {
-#else
 TEST_F(SubprocessMemoryTest, OneDefinition) {
-#endif
   testCommon({{"test1", {APInt(8, 0xff), 4096, 0}}}, 0);
   checkSharedMemoryDefinition(getSharedMemoryName(0, 0), 4096, {0xff});
 }
 
-#if defined(__powerpc__) || defined(__s390x__)
-TEST_F(SubprocessMemoryTest, DISABLED_MultipleDefinitions) {
-#else
 TEST_F(SubprocessMemoryTest, MultipleDefinitions) {
-#endif
   testCommon({{"test1", {APInt(8, 0xaa), 4096, 0}},
               {"test2", {APInt(8, 0xbb), 4096, 1}},
               {"test3", {APInt(8, 0xcc), 4096, 2}}},
@@ -100,11 +93,7 @@ TEST_F(SubprocessMemoryTest, MultipleDefinitions) {
   checkSharedMemoryDefinition(getSharedMemoryName(1, 2), 4096, {0xcc});
 }
 
-#if defined(__powerpc__) || defined(__s390x__)
-TEST_F(SubprocessMemoryTest, DISABLED_DefinitionFillsCompletely) {
-#else
 TEST_F(SubprocessMemoryTest, DefinitionFillsCompletely) {
-#endif
   testCommon({{"test1", {APInt(8, 0xaa), 4096, 0}},
               {"test2", {APInt(16, 0xbbbb), 4096, 1}},
               {"test3", {APInt(24, 0xcccccc), 4096, 2}}},
@@ -118,7 +107,7 @@ TEST_F(SubprocessMemoryTest, DefinitionFillsCompletely) {
 }
 
 // The following test is only supported on little endian systems.
-#if defined(__powerpc__) || defined(__s390x__) || __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 TEST_F(SubprocessMemoryTest, DISABLED_DefinitionEndTruncation) {
 #else
 TEST_F(SubprocessMemoryTest, DefinitionEndTruncation) {
@@ -150,7 +139,7 @@ TEST_F(SubprocessMemoryTest, DefinitionEndTruncation) {
   checkSharedMemoryDefinition(getSharedMemoryName(3, 0), 4096, Test1Expected);
 }
 
-#endif // defined(__linux__) && !defined(__ANDROID__)
+#endif // __linux__ && !__ANDROID__ && !(__powerpc__ || __s390x__ || __sparc__)
 
 } // namespace exegesis
 } // namespace llvm

From efdd0e9fda292f9040367fcd6a9b2c2590fea739 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Sat, 10 Aug 2024 13:31:35 -0700
Subject: [PATCH 169/427] [clang-format] Fix a serious bug in `git clang-format
 -f` (#102629)

With the --force (or -f) option, git-clang-format wipes out input files
excluded by a .clang-format-ignore file if they have unstaged changes.

This patch adds a hidden clang-format option --list-ignored that lists
such excluded files for git-clang-format to filter out.

Fixes #102459.

(cherry picked from commit 986bc3d0719af653fecb77e8cfc59f39bec148fd)
---
 clang/test/Format/list-ignored.cpp        | 60 +++++++++++++++++++++++
 clang/tools/clang-format/ClangFormat.cpp  | 12 ++++-
 clang/tools/clang-format/git-clang-format | 15 +++++-
 3 files changed, 84 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/Format/list-ignored.cpp

diff --git a/clang/test/Format/list-ignored.cpp b/clang/test/Format/list-ignored.cpp
new file mode 100644
index 0000000000000..6e65a68a6f996
--- /dev/null
+++ b/clang/test/Format/list-ignored.cpp
@@ -0,0 +1,60 @@
+// RUN: rm -rf %t.dir
+// RUN: mkdir -p %t.dir/level1/level2
+
+// RUN: cd %t.dir
+// RUN: echo "*" > .clang-format-ignore
+// RUN: echo "level*/*.c*" >> .clang-format-ignore
+// RUN: echo "*/*2/foo.*" >> .clang-format-ignore
+
+// RUN: touch foo.cc
+// RUN: clang-format -list-ignored .clang-format-ignore foo.cc \
+// RUN:   | FileCheck %s
+// CHECK: .clang-format-ignore
+// CHECK-NEXT: foo.cc
+
+// RUN: cd level1
+// RUN: touch bar.cc baz.c
+// RUN: clang-format -list-ignored bar.cc baz.c \
+// RUN:   | FileCheck %s -check-prefix=CHECK2
+// CHECK2: bar.cc
+// CHECK2-NEXT: baz.c
+
+// RUN: cd level2
+// RUN: touch foo.c foo.js
+// RUN: clang-format -list-ignored foo.c foo.js \
+// RUN:   | FileCheck %s -check-prefix=CHECK3
+// CHECK3: foo.c
+// CHECK3-NEXT: foo.js
+
+// RUN: touch .clang-format-ignore
+// RUN: clang-format -list-ignored foo.c foo.js \
+// RUN:   | FileCheck %s -allow-empty -check-prefix=CHECK4
+// CHECK4-NOT: foo.c
+// CHECK4-NOT: foo.js
+
+// RUN: echo "*.js" > .clang-format-ignore
+// RUN: clang-format -list-ignored foo.c foo.js \
+// RUN:   | FileCheck %s -check-prefix=CHECK5
+// CHECK5-NOT: foo.c
+// CHECK5: foo.js
+
+// RUN: cd ../..
+// RUN: clang-format -list-ignored *.cc level1/*.c* level1/level2/foo.* \
+// RUN:   | FileCheck %s -check-prefix=CHECK6
+// CHECK6: foo.cc
+// CHECK6-NEXT: bar.cc
+// CHECK6-NEXT: baz.c
+// CHECK6-NOT: foo.c
+// CHECK6-NEXT: foo.js
+
+// RUN: rm .clang-format-ignore
+// RUN: clang-format -list-ignored *.cc level1/*.c* level1/level2/foo.* \
+// RUN:   | FileCheck %s -check-prefix=CHECK7
+// CHECK7-NOT: foo.cc
+// CHECK7-NOT: bar.cc
+// CHECK7-NOT: baz.c
+// CHECK7-NOT: foo.c
+// CHECK7: foo.js
+
+// RUN: cd ..
+// RUN: rm -r %t.dir
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 6cba1267f3b0d..c4b6209a71a88 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -210,6 +210,10 @@ static cl::opt<bool> FailOnIncompleteFormat(
     cl::desc("If set, fail with exit code 1 on incomplete format."),
     cl::init(false), cl::cat(ClangFormatCategory));
 
+static cl::opt<bool> ListIgnored("list-ignored",
+                                 cl::desc("List ignored files."),
+                                 cl::cat(ClangFormatCategory), cl::Hidden);
+
 namespace clang {
 namespace format {
 
@@ -715,7 +719,13 @@ int main(int argc, const char **argv) {
   unsigned FileNo = 1;
   bool Error = false;
   for (const auto &FileName : FileNames) {
-    if (isIgnored(FileName))
+    const bool Ignored = isIgnored(FileName);
+    if (ListIgnored) {
+      if (Ignored)
+        outs() << FileName << '\n';
+      continue;
+    }
+    if (Ignored)
       continue;
     if (Verbose) {
       errs() << "Formatting [" << FileNo++ << "/" << FileNames.size() << "] "
diff --git a/clang/tools/clang-format/git-clang-format b/clang/tools/clang-format/git-clang-format
index d33fd478d77fd..714ba8a6e77d5 100755
--- a/clang/tools/clang-format/git-clang-format
+++ b/clang/tools/clang-format/git-clang-format
@@ -173,11 +173,12 @@ def main():
   # those files.
   cd_to_toplevel()
   filter_symlinks(changed_lines)
+  filter_ignored_files(changed_lines, binary=opts.binary)
   if opts.verbose >= 1:
     ignored_files.difference_update(changed_lines)
     if ignored_files:
-      print(
-        'Ignoring changes in the following files (wrong extension or symlink):')
+      print('Ignoring the following files (wrong extension, symlink, or '
+            'ignored by clang-format):')
       for filename in ignored_files:
         print('    %s' % filename)
     if changed_lines:
@@ -399,6 +400,16 @@ def filter_symlinks(dictionary):
       del dictionary[filename]
 
 
+def filter_ignored_files(dictionary, binary):
+  """Delete every key in `dictionary` that is ignored by clang-format."""
+  ignored_files = run(binary, '-list-ignored', *dictionary.keys())
+  if not ignored_files:
+    return
+  ignored_files = ignored_files.split('\n')
+  for filename in ignored_files:
+    del dictionary[filename]
+
+
 def cd_to_toplevel():
   """Change to the top level of the git repository."""
   toplevel = run('git', 'rev-parse', '--show-toplevel')

From c261de724b01a1db029d896914adc8995cd2c35a Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 8 Aug 2024 10:53:15 +0200
Subject: [PATCH 170/427] [lldb] Fix crash when adding members to an
 "incomplete" type (#102116)

This fixes a regression caused by delayed type definition searching
(#96755 and friends): If we end up adding a member (e.g. a typedef) to a
type that we've already attempted to complete (and failed), the
resulting AST would end up inconsistent (we would start to "forcibly"
complete it, but never finish it), and importing it into an expression
AST would crash.

This patch fixes this by detecting the situation and finishing the
definition as well.

(cherry picked from commit 57cd1000c9c93fd0e64352cfbc9fbbe5b8a8fcef)
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 11 +++++++--
 .../DWARF/x86/typedef-in-incomplete-type.cpp  | 23 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/typedef-in-incomplete-type.cpp

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 85c59a605c675..ac769ad9fbd52 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -269,8 +269,15 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast,
   }
 
   // We don't have a type definition and/or the import failed, but we need to
-  // add members to it. Start the definition to make that possible.
-  tag_decl_ctx->startDefinition();
+  // add members to it. Start the definition to make that possible. If the type
+  // has no external storage we also have to complete the definition. Otherwise,
+  // that will happen when we are asked to complete the type
+  // (CompleteTypeFromDWARF).
+  ast.StartTagDeclarationDefinition(type);
+  if (!tag_decl_ctx->hasExternalLexicalStorage()) {
+    ast.SetDeclIsForcefullyCompleted(tag_decl_ctx);
+    ast.CompleteTagDeclarationDefinition(type);
+  }
 }
 
 ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) {
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/typedef-in-incomplete-type.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/typedef-in-incomplete-type.cpp
new file mode 100644
index 0000000000000..591607784b0a9
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/typedef-in-incomplete-type.cpp
@@ -0,0 +1,23 @@
+// RUN: %clangxx --target=x86_64-pc-linux -flimit-debug-info -o %t -c %s -g
+// RUN: %lldb %t -o "target var a" -o "expr -- var" -o exit | FileCheck %s
+
+// This forces lldb to attempt to complete the type A. Since it has no
+// definition it will fail.
+// CHECK: target var a
+// CHECK: (A) a = <incomplete type "A">
+
+// Now attempt to display the second variable, which will try to add a typedef
+// to the incomplete type. Make sure that succeeds. Use the expression command
+// to make sure the resulting AST can be imported correctly.
+// CHECK: expr -- var
+// CHECK: (A::X) $0 = 0
+
+struct A {
+  // Declare the constructor, but don't define it to avoid emitting the
+  // definition in the debug info.
+  A();
+  using X = int;
+};
+
+A a;
+A::X var;

From 955fe3f1ef193d26c73fb54ab6e01a818a6bde8e Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Fri, 9 Aug 2024 12:32:01 -0700
Subject: [PATCH 171/427] [clang] Implement -fptrauth-auth-traps. (#102417)

This provides -fptrauth-auth-traps, which at the frontend level only
controls the addition of the "ptrauth-auth-traps" function attribute.

The attribute in turn controls various aspects of backend codegen, by
providing the guarantee that every "auth" operation generated will trap
on failure.

This can either be delegated to the hardware (if AArch64 FPAC is known
to be available), in which case this attribute doesn't change codegen.
Otherwise, if FPAC isn't available, this asks the backend to emit
additional instructions to check and trap on auth failure.

(cherry picked from commit d179acd0484bac30c5ebbbed4d29a4734d92ac93)
---
 clang/include/clang/Basic/PointerAuthOptions.h   | 3 +++
 clang/lib/CodeGen/CodeGenFunction.cpp            | 2 ++
 clang/lib/Frontend/CompilerInvocation.cpp        | 7 ++++---
 clang/test/CodeGen/ptrauth-function-attributes.c | 5 +++++
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/PointerAuthOptions.h b/clang/include/clang/Basic/PointerAuthOptions.h
index c0ab35bce5d84..a26af69e1fa24 100644
--- a/clang/include/clang/Basic/PointerAuthOptions.h
+++ b/clang/include/clang/Basic/PointerAuthOptions.h
@@ -162,6 +162,9 @@ struct PointerAuthOptions {
   /// Should return addresses be authenticated?
   bool ReturnAddresses = false;
 
+  /// Do authentication failures cause a trap?
+  bool AuthTraps = false;
+
   /// Do indirect goto label addresses need to be authenticated?
   bool IndirectGotos = false;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 4dc57d0ff5b26..2b2e23f1e5d7f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -884,6 +884,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     Fn->addFnAttr("ptrauth-returns");
   if (CodeGenOpts.PointerAuth.FunctionPointers)
     Fn->addFnAttr("ptrauth-calls");
+  if (CodeGenOpts.PointerAuth.AuthTraps)
+    Fn->addFnAttr("ptrauth-auth-traps");
   if (CodeGenOpts.PointerAuth.IndirectGotos)
     Fn->addFnAttr("ptrauth-indirect-gotos");
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index fa5d076c202a3..028fdb2cc6b9d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1504,16 +1504,17 @@ void CompilerInvocation::setDefaultPointerAuthOptions(
     Opts.CXXMemberFunctionPointers =
         PointerAuthSchema(Key::ASIA, false, Discrimination::Type);
   }
-  Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
   Opts.ReturnAddresses = LangOpts.PointerAuthReturns;
+  Opts.AuthTraps = LangOpts.PointerAuthAuthTraps;
+  Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
 }
 
 static void parsePointerAuthOptions(PointerAuthOptions &Opts,
                                     const LangOptions &LangOpts,
                                     const llvm::Triple &Triple,
                                     DiagnosticsEngine &Diags) {
-  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos &&
-      !LangOpts.PointerAuthReturns)
+  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthReturns &&
+      !LangOpts.PointerAuthAuthTraps && !LangOpts.PointerAuthIndirectGotos)
     return;
 
   CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple);
diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c b/clang/test/CodeGen/ptrauth-function-attributes.c
index 17ebf9d6e2e01..e7081f00b4f68 100644
--- a/clang/test/CodeGen/ptrauth-function-attributes.c
+++ b/clang/test/CodeGen/ptrauth-function-attributes.c
@@ -8,6 +8,9 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS
 
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-auth-traps -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,TRAPS
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-auth-traps -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,TRAPS
+
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
 
@@ -19,6 +22,8 @@ void test() {
 
 // RETS: attributes #0 = {{{.*}} "ptrauth-returns" {{.*}}}
 
+// TRAPS: attributes #0 = {{{.*}} "ptrauth-auth-traps" {{.*}}}
+
 // GOTOS: attributes #0 = {{{.*}} "ptrauth-indirect-gotos" {{.*}}}
 
 // OFF-NOT: attributes {{.*}} "ptrauth-

From 5230e327a10023b4349474bd89d67f8a1764c648 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchp@google.com>
Date: Wed, 24 Jul 2024 12:58:24 +0200
Subject: [PATCH 172/427] Revert "[libc++][math] Fix undue overflowing of
 `std::hypot(x,y,z)` (#93350)"

This reverts commit 9628777479a970db5d0c2d0b456dac6633864760.

More details in https://github.com/llvm/llvm-project/pull/93350, but
this broke the PowerPC sanitizer bots.

(cherry picked from commit 1031335f2ee1879737576fde3a3425ce0046e773)
---
 libcxx/include/__math/hypot.h                 | 89 ------------------
 libcxx/include/cmath                          | 25 ++++-
 .../test/libcxx/transitive_includes/cxx17.csv |  3 -
 .../test/libcxx/transitive_includes/cxx20.csv |  3 -
 .../test/libcxx/transitive_includes/cxx23.csv |  3 -
 .../test/libcxx/transitive_includes/cxx26.csv |  3 -
 .../test/std/numerics/c.math/cmath.pass.cpp   | 91 ++++---------------
 libcxx/test/support/fp_compare.h              | 45 +++++----
 8 files changed, 65 insertions(+), 197 deletions(-)

diff --git a/libcxx/include/__math/hypot.h b/libcxx/include/__math/hypot.h
index 61fd260c59409..1bf193a9ab7ee 100644
--- a/libcxx/include/__math/hypot.h
+++ b/libcxx/include/__math/hypot.h
@@ -15,21 +15,10 @@
 #include <__type_traits/is_same.h>
 #include <__type_traits/promote.h>
 
-#if _LIBCPP_STD_VER >= 17
-#  include <__algorithm/max.h>
-#  include <__math/abs.h>
-#  include <__math/roots.h>
-#  include <__utility/pair.h>
-#  include <limits>
-#endif
-
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __math {
@@ -52,86 +41,8 @@ inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type hypot(_A1 __x, _
   return __math::hypot((__result_type)__x, (__result_type)__y);
 }
 
-#if _LIBCPP_STD_VER >= 17
-// Factors needed to determine if over-/underflow might happen for `std::hypot(x,y,z)`.
-// returns [overflow_threshold, overflow_scale]
-template <class _Real>
-_LIBCPP_HIDE_FROM_ABI std::pair<_Real, _Real> __hypot_factors() {
-  static_assert(std::numeric_limits<_Real>::is_iec559);
-
-  if constexpr (std::is_same_v<_Real, float>) {
-    static_assert(-125 == std::numeric_limits<_Real>::min_exponent);
-    static_assert(+128 == std::numeric_limits<_Real>::max_exponent);
-    return {0x1.0p+62f, 0x1.0p-70f};
-  } else if constexpr (std::is_same_v<_Real, double>) {
-    static_assert(-1021 == std::numeric_limits<_Real>::min_exponent);
-    static_assert(+1024 == std::numeric_limits<_Real>::max_exponent);
-    return {0x1.0p+510, 0x1.0p-600};
-  } else { // long double
-    static_assert(std::is_same_v<_Real, long double>);
-
-    // preprocessor guard necessary, otherwise literals (e.g. `0x1.0p+8'190l`) throw warnings even when shielded by `if
-    // constexpr`
-#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
-    static_assert(sizeof(_Real) == sizeof(double));
-    return static_cast<std::pair<_Real, _Real>>(__math::__hypot_factors<double>());
-#  else
-    static_assert(sizeof(_Real) > sizeof(double));
-    static_assert(-16381 == std::numeric_limits<_Real>::min_exponent);
-    static_assert(+16384 == std::numeric_limits<_Real>::max_exponent);
-    return {0x1.0p+8190l, 0x1.0p-9000l};
-#  endif
-  }
-}
-
-// Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`.
-// The naive implementation might over-/underflow which is why this implementation is more involved:
-//    If the square of an argument might run into issues, we scale the arguments appropriately.
-// See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary.
-template <class _Real>
-_LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) {
-  const _Real __max_abs = std::max(__math::fabs(__x), std::max(__math::fabs(__y), __math::fabs(__z)));
-  const auto [__overflow_threshold, __overflow_scale] = __math::__hypot_factors<_Real>();
-  _Real __scale;
-  if (__max_abs > __overflow_threshold) { // x*x + y*y + z*z might overflow
-    __scale = __overflow_scale;
-    __x *= __scale;
-    __y *= __scale;
-    __z *= __scale;
-  } else if (__max_abs < 1 / __overflow_threshold) { // x*x + y*y + z*z might underflow
-    __scale = 1 / __overflow_scale;
-    __x *= __scale;
-    __y *= __scale;
-    __z *= __scale;
-  } else
-    __scale = 1;
-  return __math::sqrt(__x * __x + __y * __y + __z * __z) / __scale;
-}
-
-inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) { return __math::__hypot(__x, __y, __z); }
-
-inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) { return __math::__hypot(__x, __y, __z); }
-
-inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
-  return __math::__hypot(__x, __y, __z);
-}
-
-template <class _A1,
-          class _A2,
-          class _A3,
-          std::enable_if_t< is_arithmetic_v<_A1> && is_arithmetic_v<_A2> && is_arithmetic_v<_A3>, int> = 0 >
-_LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2, _A3>::type hypot(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT {
-  using __result_type = typename __promote<_A1, _A2, _A3>::type;
-  static_assert(!(
-      std::is_same_v<_A1, __result_type> && std::is_same_v<_A2, __result_type> && std::is_same_v<_A3, __result_type>));
-  return __math::__hypot(
-      static_cast<__result_type>(__x), static_cast<__result_type>(__y), static_cast<__result_type>(__z));
-}
-#endif
-
 } // namespace __math
 
 _LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
 
 #endif // _LIBCPP___MATH_HYPOT_H
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index 6480c4678ce33..3c22604a683c3 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -313,7 +313,6 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept
 */
 
 #include <__config>
-#include <__math/hypot.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_arithmetic.h>
 #include <__type_traits/is_constant_evaluated.h>
@@ -554,6 +553,30 @@ using ::scalbnl _LIBCPP_USING_IF_EXISTS;
 using ::tgammal _LIBCPP_USING_IF_EXISTS;
 using ::truncl _LIBCPP_USING_IF_EXISTS;
 
+#if _LIBCPP_STD_VER >= 17
+inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) {
+  return sqrt(__x * __x + __y * __y + __z * __z);
+}
+inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) {
+  return sqrt(__x * __x + __y * __y + __z * __z);
+}
+inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
+  return sqrt(__x * __x + __y * __y + __z * __z);
+}
+
+template <class _A1, class _A2, class _A3>
+inline _LIBCPP_HIDE_FROM_ABI
+typename enable_if_t< is_arithmetic<_A1>::value && is_arithmetic<_A2>::value && is_arithmetic<_A3>::value,
+                      __promote<_A1, _A2, _A3> >::type
+hypot(_A1 __lcpp_x, _A2 __lcpp_y, _A3 __lcpp_z) _NOEXCEPT {
+  typedef typename __promote<_A1, _A2, _A3>::type __result_type;
+  static_assert(
+      !(is_same<_A1, __result_type>::value && is_same<_A2, __result_type>::value && is_same<_A3, __result_type>::value),
+      "");
+  return std::hypot((__result_type)__lcpp_x, (__result_type)__lcpp_y, (__result_type)__lcpp_z);
+}
+#endif
+
 template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isnan(_A1 __lcpp_x) _NOEXCEPT {
 #if __has_builtin(__builtin_isnan)
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 8099d2b79c4be..2c028462144ee 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -130,9 +130,6 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
-cmath cstddef
-cmath cstdint
-cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 384e51b101f31..982c2013e3417 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -135,9 +135,6 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
-cmath cstddef
-cmath cstdint
-cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index 46b833d143f39..8ffb71d8b566b 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -83,9 +83,6 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
-cmath cstddef
-cmath cstdint
-cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 46b833d143f39..8ffb71d8b566b 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -83,9 +83,6 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
-cmath cstddef
-cmath cstdint
-cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
index 19b5fd0cf8996..9379084499792 100644
--- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
@@ -12,17 +12,14 @@
 
 // <cmath>
 
-#include <array>
 #include <cmath>
 #include <limits>
 #include <type_traits>
 #include <cassert>
 
-#include "fp_compare.h"
 #include "test_macros.h"
 #include "hexfloat.h"
 #include "truncate_fp.h"
-#include "type_algorithms.h"
 
 // convertible to int/float/double/etc
 template <class T, int N=0>
@@ -1116,56 +1113,6 @@ void test_fmin()
     assert(std::fmin(1,0) == 0);
 }
 
-#if TEST_STD_VER >= 17
-struct TestHypot3 {
-  template <class Real>
-  void operator()() const {
-    const auto check = [](Real elem, Real abs_tol) {
-      assert(std::isfinite(std::hypot(elem, Real(0), Real(0))));
-      assert(fptest_close(std::hypot(elem, Real(0), Real(0)), elem, abs_tol));
-      assert(std::isfinite(std::hypot(elem, elem, Real(0))));
-      assert(fptest_close(std::hypot(elem, elem, Real(0)), std::sqrt(Real(2)) * elem, abs_tol));
-      assert(std::isfinite(std::hypot(elem, elem, elem)));
-      assert(fptest_close(std::hypot(elem, elem, elem), std::sqrt(Real(3)) * elem, abs_tol));
-    };
-
-    { // check for overflow
-      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
-        if constexpr (std::is_same_v<Real, float>)
-          return {1e20f, 1e16f};
-        else if constexpr (std::is_same_v<Real, double>)
-          return {1e300, 1e287};
-        else { // long double
-#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
-          return {1e300l, 1e287l}; // 64-bit
-#  else
-          return {1e4000l, 1e3985l}; // 80- or 128-bit
-#  endif
-        }
-      }();
-      check(elem, abs_tol);
-    }
-
-    { // check for underflow
-      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
-        if constexpr (std::is_same_v<Real, float>)
-          return {1e-20f, 1e-24f};
-        else if constexpr (std::is_same_v<Real, double>)
-          return {1e-287, 1e-300};
-        else { // long double
-#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
-          return {1e-287l, 1e-300l}; // 64-bit
-#  else
-          return {1e-3985l, 1e-4000l}; // 80- or 128-bit
-#  endif
-        }
-      }();
-      check(elem, abs_tol);
-    }
-  }
-};
-#endif
-
 void test_hypot()
 {
     static_assert((std::is_same<decltype(std::hypot((float)0, (float)0)), float>::value), "");
@@ -1188,31 +1135,25 @@ void test_hypot()
     static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous())), Ambiguous>::value), "");
     assert(std::hypot(3,4) == 5);
 
-#if TEST_STD_VER >= 17
-    // clang-format off
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (float)0)),              float>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (bool)0,           (float)0)),              double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (unsigned short)0, (double)0)),             double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long double)0)),        long double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long)0)),               double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (unsigned long)0)),      long double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long long)0)),          double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (unsigned long long)0)), double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (double)0)),             double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (long double)0)),        long double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (double)0)),             double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (long double)0)),        long double>));
-    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long double)0)),        long double>));
-    static_assert((std::is_same_v<decltype(std::hypot((int)0,   (int)0,            (int)0)),                double>));
-    static_assert((std::is_same_v<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>));
-    // clang-format on
+#if TEST_STD_VER > 14
+    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (float)0)), float>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (bool)0, (float)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (unsigned short)0, (double)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long double)0)), long double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (unsigned long)0)), long double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long long)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (unsigned long long)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (double)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (long double)0)), long double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (double)0)), double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (long double)0)), long double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long double)0)), long double>::value), "");
+    static_assert((std::is_same<decltype(std::hypot((int)0, (int)0, (int)0)), double>::value), "");
+    static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>::value), "");
 
     assert(std::hypot(2,3,6) == 7);
     assert(std::hypot(1,4,8) == 9);
-
-    // Check for undue over-/underflows of intermediate results.
-    // See discussion at https://github.com/llvm/llvm-project/issues/92782.
-    types::for_each(types::floating_point_types(), TestHypot3());
 #endif
 }
 
diff --git a/libcxx/test/support/fp_compare.h b/libcxx/test/support/fp_compare.h
index 3088a211dadc3..1d1933b0bcd81 100644
--- a/libcxx/test/support/fp_compare.h
+++ b/libcxx/test/support/fp_compare.h
@@ -9,34 +9,39 @@
 #ifndef SUPPORT_FP_COMPARE_H
 #define SUPPORT_FP_COMPARE_H
 
-#include <cmath>     // for std::abs
-#include <algorithm> // for std::max
+#include <cmath>      // for std::abs
+#include <algorithm>  // for std::max
 #include <cassert>
-#include <__config>
 
 // See https://www.boost.org/doc/libs/1_70_0/libs/test/doc/html/boost_test/testing_tools/extended_comparison/floating_point/floating_points_comparison_theory.html
 
-template <typename T>
-bool fptest_close(T val, T expected, T eps) {
-  _LIBCPP_CONSTEXPR T zero = T(0);
-  assert(eps >= zero);
+template<typename T>
+bool fptest_close(T val, T expected, T eps)
+{
+    constexpr T zero = T(0);
+    assert(eps >= zero);
 
-  // Handle the zero cases
-  if (eps == zero)
-    return val == expected;
-  if (val == zero)
-    return std::abs(expected) <= eps;
-  if (expected == zero)
-    return std::abs(val) <= eps;
+    // Handle the zero cases
+    if (eps      == zero) return val == expected;
+    if (val      == zero) return std::abs(expected) <= eps;
+    if (expected == zero) return std::abs(val)      <= eps;
 
-  return std::abs(val - expected) < eps && std::abs(val - expected) / std::abs(val) < eps;
+    return std::abs(val - expected) < eps
+        && std::abs(val - expected)/std::abs(val) < eps;
 }
 
-template <typename T>
-bool fptest_close_pct(T val, T expected, T percent) {
-  assert(percent >= T(0));
-  T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
-  return fptest_close(val, expected, eps);
+template<typename T>
+bool fptest_close_pct(T val, T expected, T percent)
+{
+    constexpr T zero = T(0);
+    assert(percent >= zero);
+
+    // Handle the zero cases
+    if (percent == zero) return val == expected;
+    T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
+
+    return fptest_close(val, expected, eps);
 }
 
+
 #endif // SUPPORT_FP_COMPARE_H

From 2c29bd3d4cdf8b81ebeb8e562fbc91f63a90a1ad Mon Sep 17 00:00:00 2001
From: PaulXiCao <paulxicao7@gmail.com>
Date: Mon, 5 Aug 2024 20:08:47 +0000
Subject: [PATCH 173/427] [libc++][math] Fix undue overflowing of
 `std::hypot(x,y,z)` (#100820)

This is in relation to mr #93350. It was merged to main, but reverted
because of failing sanitizer builds on PowerPC.

The fix includes replacing the hard-coded threshold constants (e.g.
`__overflow_threshold`) for different floating-point sizes by a general
computation using `std::ldexp`. Thus, it should now work for all architectures.
This has the drawback of not being `constexpr` anymore as `std::ldexp`
is not implemented as `constexpr` (even though the standard mandates it
for C++23).

Closes #92782

(cherry picked from commit 72825fde03aab3ce9eba2635b872144d1fb6b6b2)
---
 libcxx/include/__math/hypot.h                 | 61 +++++++++++++
 libcxx/include/cmath                          | 25 +----
 .../test/libcxx/transitive_includes/cxx03.csv |  3 +
 .../test/libcxx/transitive_includes/cxx11.csv |  3 +
 .../test/libcxx/transitive_includes/cxx14.csv |  3 +
 .../test/libcxx/transitive_includes/cxx17.csv |  3 +
 .../test/libcxx/transitive_includes/cxx20.csv |  3 +
 .../test/libcxx/transitive_includes/cxx23.csv |  3 +
 .../test/libcxx/transitive_includes/cxx26.csv |  3 +
 .../test/std/numerics/c.math/cmath.pass.cpp   | 91 +++++++++++++++----
 libcxx/test/support/fp_compare.h              | 45 ++++-----
 11 files changed, 178 insertions(+), 65 deletions(-)

diff --git a/libcxx/include/__math/hypot.h b/libcxx/include/__math/hypot.h
index 1bf193a9ab7ee..b992163711010 100644
--- a/libcxx/include/__math/hypot.h
+++ b/libcxx/include/__math/hypot.h
@@ -9,16 +9,25 @@
 #ifndef _LIBCPP___MATH_HYPOT_H
 #define _LIBCPP___MATH_HYPOT_H
 
+#include <__algorithm/max.h>
 #include <__config>
+#include <__math/abs.h>
+#include <__math/exponential_functions.h>
+#include <__math/roots.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_arithmetic.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/promote.h>
+#include <__utility/pair.h>
+#include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __math {
@@ -41,8 +50,60 @@ inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type hypot(_A1 __x, _
   return __math::hypot((__result_type)__x, (__result_type)__y);
 }
 
+#if _LIBCPP_STD_VER >= 17
+// Computes the three-dimensional hypotenuse: `std::hypot(x,y,z)`.
+// The naive implementation might over-/underflow which is why this implementation is more involved:
+//    If the square of an argument might run into issues, we scale the arguments appropriately.
+// See https://github.com/llvm/llvm-project/issues/92782 for a detailed discussion and summary.
+template <class _Real>
+_LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) {
+  // Factors needed to determine if over-/underflow might happen
+  constexpr int __exp              = std::numeric_limits<_Real>::max_exponent / 2;
+  const _Real __overflow_threshold = __math::ldexp(_Real(1), __exp);
+  const _Real __overflow_scale     = __math::ldexp(_Real(1), -(__exp + 20));
+
+  // Scale arguments depending on their size
+  const _Real __max_abs = std::max(__math::fabs(__x), std::max(__math::fabs(__y), __math::fabs(__z)));
+  _Real __scale;
+  if (__max_abs > __overflow_threshold) { // x*x + y*y + z*z might overflow
+    __scale = __overflow_scale;
+  } else if (__max_abs < 1 / __overflow_threshold) { // x*x + y*y + z*z might underflow
+    __scale = 1 / __overflow_scale;
+  } else {
+    __scale = 1;
+  }
+  __x *= __scale;
+  __y *= __scale;
+  __z *= __scale;
+
+  // Compute hypot of scaled arguments and undo scaling
+  return __math::sqrt(__x * __x + __y * __y + __z * __z) / __scale;
+}
+
+inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) { return __math::__hypot(__x, __y, __z); }
+
+inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) { return __math::__hypot(__x, __y, __z); }
+
+inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
+  return __math::__hypot(__x, __y, __z);
+}
+
+template <class _A1,
+          class _A2,
+          class _A3,
+          std::enable_if_t< is_arithmetic_v<_A1> && is_arithmetic_v<_A2> && is_arithmetic_v<_A3>, int> = 0 >
+_LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2, _A3>::type hypot(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT {
+  using __result_type = typename __promote<_A1, _A2, _A3>::type;
+  static_assert(!(
+      std::is_same_v<_A1, __result_type> && std::is_same_v<_A2, __result_type> && std::is_same_v<_A3, __result_type>));
+  return __math::__hypot(
+      static_cast<__result_type>(__x), static_cast<__result_type>(__y), static_cast<__result_type>(__z));
+}
+#endif
+
 } // namespace __math
 
 _LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
 
 #endif // _LIBCPP___MATH_HYPOT_H
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index 3c22604a683c3..6480c4678ce33 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -313,6 +313,7 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept
 */
 
 #include <__config>
+#include <__math/hypot.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_arithmetic.h>
 #include <__type_traits/is_constant_evaluated.h>
@@ -553,30 +554,6 @@ using ::scalbnl _LIBCPP_USING_IF_EXISTS;
 using ::tgammal _LIBCPP_USING_IF_EXISTS;
 using ::truncl _LIBCPP_USING_IF_EXISTS;
 
-#if _LIBCPP_STD_VER >= 17
-inline _LIBCPP_HIDE_FROM_ABI float hypot(float __x, float __y, float __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-inline _LIBCPP_HIDE_FROM_ABI double hypot(double __x, double __y, double __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y, long double __z) {
-  return sqrt(__x * __x + __y * __y + __z * __z);
-}
-
-template <class _A1, class _A2, class _A3>
-inline _LIBCPP_HIDE_FROM_ABI
-typename enable_if_t< is_arithmetic<_A1>::value && is_arithmetic<_A2>::value && is_arithmetic<_A3>::value,
-                      __promote<_A1, _A2, _A3> >::type
-hypot(_A1 __lcpp_x, _A2 __lcpp_y, _A3 __lcpp_z) _NOEXCEPT {
-  typedef typename __promote<_A1, _A2, _A3>::type __result_type;
-  static_assert(
-      !(is_same<_A1, __result_type>::value && is_same<_A2, __result_type>::value && is_same<_A3, __result_type>::value),
-      "");
-  return std::hypot((__result_type)__lcpp_x, (__result_type)__lcpp_y, (__result_type)__lcpp_z);
-}
-#endif
-
 template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isnan(_A1 __lcpp_x) _NOEXCEPT {
 #if __has_builtin(__builtin_isnan)
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index 51e659f52000b..6db523002d5d7 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -129,6 +129,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 17e85e982729c..056eea9e5f3d2 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -129,6 +129,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 8aed93da9e6cc..5d79ee3cddf6e 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -130,6 +130,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 2c028462144ee..8099d2b79c4be 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -130,6 +130,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 982c2013e3417..384e51b101f31 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -135,6 +135,9 @@ chrono type_traits
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath type_traits
 cmath version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index 8ffb71d8b566b..46b833d143f39 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -83,6 +83,9 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 8ffb71d8b566b..46b833d143f39 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -83,6 +83,9 @@ chrono string_view
 chrono vector
 chrono version
 cinttypes cstdint
+cmath cstddef
+cmath cstdint
+cmath initializer_list
 cmath limits
 cmath version
 codecvt cctype
diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
index 9379084499792..19b5fd0cf8996 100644
--- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp
@@ -12,14 +12,17 @@
 
 // <cmath>
 
+#include <array>
 #include <cmath>
 #include <limits>
 #include <type_traits>
 #include <cassert>
 
+#include "fp_compare.h"
 #include "test_macros.h"
 #include "hexfloat.h"
 #include "truncate_fp.h"
+#include "type_algorithms.h"
 
 // convertible to int/float/double/etc
 template <class T, int N=0>
@@ -1113,6 +1116,56 @@ void test_fmin()
     assert(std::fmin(1,0) == 0);
 }
 
+#if TEST_STD_VER >= 17
+struct TestHypot3 {
+  template <class Real>
+  void operator()() const {
+    const auto check = [](Real elem, Real abs_tol) {
+      assert(std::isfinite(std::hypot(elem, Real(0), Real(0))));
+      assert(fptest_close(std::hypot(elem, Real(0), Real(0)), elem, abs_tol));
+      assert(std::isfinite(std::hypot(elem, elem, Real(0))));
+      assert(fptest_close(std::hypot(elem, elem, Real(0)), std::sqrt(Real(2)) * elem, abs_tol));
+      assert(std::isfinite(std::hypot(elem, elem, elem)));
+      assert(fptest_close(std::hypot(elem, elem, elem), std::sqrt(Real(3)) * elem, abs_tol));
+    };
+
+    { // check for overflow
+      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
+        if constexpr (std::is_same_v<Real, float>)
+          return {1e20f, 1e16f};
+        else if constexpr (std::is_same_v<Real, double>)
+          return {1e300, 1e287};
+        else { // long double
+#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
+          return {1e300l, 1e287l}; // 64-bit
+#  else
+          return {1e4000l, 1e3985l}; // 80- or 128-bit
+#  endif
+        }
+      }();
+      check(elem, abs_tol);
+    }
+
+    { // check for underflow
+      const auto [elem, abs_tol] = []() -> std::array<Real, 2> {
+        if constexpr (std::is_same_v<Real, float>)
+          return {1e-20f, 1e-24f};
+        else if constexpr (std::is_same_v<Real, double>)
+          return {1e-287, 1e-300};
+        else { // long double
+#  if __DBL_MAX_EXP__ == __LDBL_MAX_EXP__
+          return {1e-287l, 1e-300l}; // 64-bit
+#  else
+          return {1e-3985l, 1e-4000l}; // 80- or 128-bit
+#  endif
+        }
+      }();
+      check(elem, abs_tol);
+    }
+  }
+};
+#endif
+
 void test_hypot()
 {
     static_assert((std::is_same<decltype(std::hypot((float)0, (float)0)), float>::value), "");
@@ -1135,25 +1188,31 @@ void test_hypot()
     static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous())), Ambiguous>::value), "");
     assert(std::hypot(3,4) == 5);
 
-#if TEST_STD_VER > 14
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (float)0)), float>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (bool)0, (float)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (unsigned short)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (unsigned long)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (long long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (int)0, (unsigned long long)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (long double)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (double)0)), double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (float)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((float)0, (double)0, (long double)0)), long double>::value), "");
-    static_assert((std::is_same<decltype(std::hypot((int)0, (int)0, (int)0)), double>::value), "");
-    static_assert((std::is_same<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>::value), "");
+#if TEST_STD_VER >= 17
+    // clang-format off
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (float)0)),              float>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (bool)0,           (float)0)),              double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (unsigned short)0, (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long)0)),               double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (unsigned long)0)),      long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (long long)0)),          double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (int)0,            (unsigned long long)0)), double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (long double)0,    (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (double)0)),             double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (float)0,          (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((float)0, (double)0,         (long double)0)),        long double>));
+    static_assert((std::is_same_v<decltype(std::hypot((int)0,   (int)0,            (int)0)),                double>));
+    static_assert((std::is_same_v<decltype(hypot(Ambiguous(), Ambiguous(), Ambiguous())), Ambiguous>));
+    // clang-format on
 
     assert(std::hypot(2,3,6) == 7);
     assert(std::hypot(1,4,8) == 9);
+
+    // Check for undue over-/underflows of intermediate results.
+    // See discussion at https://github.com/llvm/llvm-project/issues/92782.
+    types::for_each(types::floating_point_types(), TestHypot3());
 #endif
 }
 
diff --git a/libcxx/test/support/fp_compare.h b/libcxx/test/support/fp_compare.h
index 1d1933b0bcd81..3088a211dadc3 100644
--- a/libcxx/test/support/fp_compare.h
+++ b/libcxx/test/support/fp_compare.h
@@ -9,39 +9,34 @@
 #ifndef SUPPORT_FP_COMPARE_H
 #define SUPPORT_FP_COMPARE_H
 
-#include <cmath>      // for std::abs
-#include <algorithm>  // for std::max
+#include <cmath>     // for std::abs
+#include <algorithm> // for std::max
 #include <cassert>
+#include <__config>
 
 // See https://www.boost.org/doc/libs/1_70_0/libs/test/doc/html/boost_test/testing_tools/extended_comparison/floating_point/floating_points_comparison_theory.html
 
-template<typename T>
-bool fptest_close(T val, T expected, T eps)
-{
-    constexpr T zero = T(0);
-    assert(eps >= zero);
+template <typename T>
+bool fptest_close(T val, T expected, T eps) {
+  _LIBCPP_CONSTEXPR T zero = T(0);
+  assert(eps >= zero);
 
-    // Handle the zero cases
-    if (eps      == zero) return val == expected;
-    if (val      == zero) return std::abs(expected) <= eps;
-    if (expected == zero) return std::abs(val)      <= eps;
+  // Handle the zero cases
+  if (eps == zero)
+    return val == expected;
+  if (val == zero)
+    return std::abs(expected) <= eps;
+  if (expected == zero)
+    return std::abs(val) <= eps;
 
-    return std::abs(val - expected) < eps
-        && std::abs(val - expected)/std::abs(val) < eps;
+  return std::abs(val - expected) < eps && std::abs(val - expected) / std::abs(val) < eps;
 }
 
-template<typename T>
-bool fptest_close_pct(T val, T expected, T percent)
-{
-    constexpr T zero = T(0);
-    assert(percent >= zero);
-
-    // Handle the zero cases
-    if (percent == zero) return val == expected;
-    T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
-
-    return fptest_close(val, expected, eps);
+template <typename T>
+bool fptest_close_pct(T val, T expected, T percent) {
+  assert(percent >= T(0));
+  T eps = (percent / T(100)) * std::max(std::abs(val), std::abs(expected));
+  return fptest_close(val, expected, eps);
 }
 
-
 #endif // SUPPORT_FP_COMPARE_H

From 3f193bcc5a72f05a4b2d5f74a0ee16a76b3b32f2 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Thu, 8 Aug 2024 13:29:59 +0800
Subject: [PATCH 174/427] [C++20] [Modules] Don't diagnose duplicated implicit
 decl in multiple named modules (#102423)

Close https://github.com/llvm/llvm-project/issues/102360
Close https://github.com/llvm/llvm-project/issues/102349

http://eel.is/c++draft/basic.def.odr#15.3 makes it clear that the
duplicated deinition are not allowed to be attached to named modules.

But we need to filter the implicit declarations as user can do nothing
about it and the diagnostic message is annoying.

(cherry picked from commit e72d956b99e920b0fe2a7946eb3a51b9e889c73c)
---
 clang/lib/Serialization/ASTReaderDecl.cpp | 65 +++++++++++++++++------
 clang/test/Modules/pr102349.cppm          | 52 ++++++++++++++++++
 clang/test/Modules/pr102360.cppm          | 53 ++++++++++++++++++
 3 files changed, 154 insertions(+), 16 deletions(-)
 create mode 100644 clang/test/Modules/pr102349.cppm
 create mode 100644 clang/test/Modules/pr102360.cppm

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 31ab6c651d59f..ccc97f65526d6 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3684,6 +3684,54 @@ static void inheritDefaultTemplateArguments(ASTContext &Context,
   }
 }
 
+// [basic.link]/p10:
+//    If two declarations of an entity are attached to different modules,
+//    the program is ill-formed;
+static void checkMultipleDefinitionInNamedModules(ASTReader &Reader, Decl *D,
+                                                  Decl *Previous) {
+  Module *M = Previous->getOwningModule();
+
+  // We only care about the case in named modules.
+  if (!M || !M->isNamedModule())
+    return;
+
+  // If it is previous implcitly introduced, it is not meaningful to
+  // diagnose it.
+  if (Previous->isImplicit())
+    return;
+
+  // FIXME: Get rid of the enumeration of decl types once we have an appropriate
+  // abstract for decls of an entity. e.g., the namespace decl and using decl
+  // doesn't introduce an entity.
+  if (!isa<VarDecl, FunctionDecl, TagDecl, RedeclarableTemplateDecl>(Previous))
+    return;
+
+  // Skip implicit instantiations since it may give false positive diagnostic
+  // messages.
+  // FIXME: Maybe this shows the implicit instantiations may have incorrect
+  // module owner ships. But given we've finished the compilation of a module,
+  // how can we add new entities to that module?
+  if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(Previous);
+      VTSD && !VTSD->isExplicitSpecialization())
+    return;
+  if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(Previous);
+      CTSD && !CTSD->isExplicitSpecialization())
+    return;
+  if (auto *Func = dyn_cast<FunctionDecl>(Previous))
+    if (auto *FTSI = Func->getTemplateSpecializationInfo();
+        FTSI && !FTSI->isExplicitSpecialization())
+      return;
+
+  // It is fine if they are in the same module.
+  if (Reader.getContext().isInSameModule(M, D->getOwningModule()))
+    return;
+
+  Reader.Diag(Previous->getLocation(),
+              diag::err_multiple_decl_in_different_modules)
+      << cast<NamedDecl>(Previous) << M->Name;
+  Reader.Diag(D->getLocation(), diag::note_also_found);
+}
+
 void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
                                        Decl *Previous, Decl *Canon) {
   assert(D && Previous);
@@ -3697,22 +3745,7 @@ void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
 #include "clang/AST/DeclNodes.inc"
   }
 
-  // [basic.link]/p10:
-  //    If two declarations of an entity are attached to different modules,
-  //    the program is ill-formed;
-  //
-  // FIXME: Get rid of the enumeration of decl types once we have an appropriate
-  // abstract for decls of an entity. e.g., the namespace decl and using decl
-  // doesn't introduce an entity.
-  if (Module *M = Previous->getOwningModule();
-      M && M->isNamedModule() &&
-      isa<VarDecl, FunctionDecl, TagDecl, RedeclarableTemplateDecl>(Previous) &&
-      !Reader.getContext().isInSameModule(M, D->getOwningModule())) {
-    Reader.Diag(Previous->getLocation(),
-                diag::err_multiple_decl_in_different_modules)
-        << cast<NamedDecl>(Previous) << M->Name;
-    Reader.Diag(D->getLocation(), diag::note_also_found);
-  }
+  checkMultipleDefinitionInNamedModules(Reader, D, Previous);
 
   // If the declaration was visible in one module, a redeclaration of it in
   // another module remains visible even if it wouldn't be visible by itself.
diff --git a/clang/test/Modules/pr102349.cppm b/clang/test/Modules/pr102349.cppm
new file mode 100644
index 0000000000000..2d166c9e93fcf
--- /dev/null
+++ b/clang/test/Modules/pr102349.cppm
@@ -0,0 +1,52 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:   -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm \
+// RUN:   -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify \
+// RUN:   -fprebuilt-module-path=%t
+
+//--- a.cppm
+export module a;
+
+export template<typename>
+struct a;
+
+template<typename>
+struct a {
+};
+
+export template<typename T>
+constexpr auto aa = a<T>();
+
+//--- b.cppm
+export module b;
+
+import a;
+
+static void b() {
+	static_cast<void>(a<void>());
+}
+
+//--- c.cppm
+export module c;
+
+import a;
+
+static void c() {
+	static_cast<void>(aa<void>);
+}
+
+//--- d.cpp
+// expected-no-diagnostics
+import a;
+import b;
+import c;
+
+static void d() {
+	static_cast<void>(a<void>());
+}
diff --git a/clang/test/Modules/pr102360.cppm b/clang/test/Modules/pr102360.cppm
new file mode 100644
index 0000000000000..e0dab1a031801
--- /dev/null
+++ b/clang/test/Modules/pr102360.cppm
@@ -0,0 +1,53 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:   -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm \
+// RUN:   -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify \
+// RUN:   -fprebuilt-module-path=%t
+
+//--- a.cppm
+export module a;
+
+template<typename>
+constexpr auto impl = true;
+
+export template<typename T>
+void a() {
+}
+
+export template<typename T> requires impl<T>
+void a() {
+}
+
+//--- b.cppm
+export module b;
+
+import a;
+
+static void b() {
+	a<void>();
+}
+
+//--- c.cppm
+export module c;
+
+import a;
+
+static void c() {
+	a<void>();
+}
+
+//--- d.cpp
+// expected-no-diagnostics
+import a;
+import b;
+import c;
+
+static void d() {
+	a<void>();
+}

From 28f2d04b3ca36faffe997fa86833e5ed83699272 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <274595+qiongsiwu@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:00:25 -0400
Subject: [PATCH 175/427] [AIX] Revert `#pragma mc_func` check (#102919)

https://github.com/llvm/llvm-project/pull/99888 added a specific
diagnostic for `#pragma mc_func` on AIX. There are some disagreements
on:

1. If the check should be on by default. Leaving the check off by
default is dangerous, since it is difficult to be aware of such a check.
Turning it on by default at the moment causes build failures on AIX. See
https://github.com/llvm/llvm-project/pull/101336 for more details.
2. If the check can be made more general. See
https://github.com/llvm/llvm-project/pull/101336#issuecomment-2269283906.

This PR reverts this check from `main` so we can flush out these
disagreements.

(cherry picked from commit 123b6fcc70af17d81c903b839ffb55afc9a9728f)
---
 .../clang/Basic/DiagnosticParseKinds.td       |  3 ---
 clang/include/clang/Driver/Options.td         |  7 ------
 clang/include/clang/Lex/PreprocessorOptions.h |  5 ----
 clang/include/clang/Parse/Parser.h            |  1 -
 clang/lib/Driver/ToolChains/AIX.cpp           |  6 -----
 clang/lib/Parse/ParsePragma.cpp               | 25 -------------------
 clang/test/Preprocessor/pragma_mc_func.c      | 23 -----------------
 7 files changed, 70 deletions(-)
 delete mode 100644 clang/test/Preprocessor/pragma_mc_func.c

diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index f8d50d12bb935..12aab09f28556 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1260,9 +1260,6 @@ def warn_pragma_intrinsic_builtin : Warning<
 def warn_pragma_unused_expected_var : Warning<
   "expected '#pragma unused' argument to be a variable name">,
   InGroup<IgnoredPragmas>;
-// - #pragma mc_func
-def err_pragma_mc_func_not_supported :
-   Error<"#pragma mc_func is not supported">;
 // - #pragma init_seg
 def warn_pragma_init_seg_unsupported_target : Warning<
   "'#pragma init_seg' is only supported when targeting a "
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5e412cb5f5189..15f9ee75492e3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8090,13 +8090,6 @@ def source_date_epoch : Separate<["-"], "source-date-epoch">,
 
 } // let Visibility = [CC1Option]
 
-defm err_pragma_mc_func_aix : BoolFOption<"err-pragma-mc-func-aix",
-  PreprocessorOpts<"ErrorOnPragmaMcfuncOnAIX">, DefaultFalse,
-  PosFlag<SetTrue, [], [ClangOption, CC1Option],
-          "Treat uses of #pragma mc_func as errors">,
-  NegFlag<SetFalse,[], [ClangOption, CC1Option],
-          "Ignore uses of #pragma mc_func">>;
-
 //===----------------------------------------------------------------------===//
 // CUDA Options
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 3f7dd9db18ba7..c2e3d68333024 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -211,10 +211,6 @@ class PreprocessorOptions {
   /// If set, the UNIX timestamp specified by SOURCE_DATE_EPOCH.
   std::optional<uint64_t> SourceDateEpoch;
 
-  /// If set, the preprocessor reports an error when processing #pragma mc_func
-  /// on AIX.
-  bool ErrorOnPragmaMcfuncOnAIX = false;
-
 public:
   PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
 
@@ -252,7 +248,6 @@ class PreprocessorOptions {
     PrecompiledPreambleBytes.first = 0;
     PrecompiledPreambleBytes.second = false;
     RetainExcludedConditionalBlocks = false;
-    ErrorOnPragmaMcfuncOnAIX = false;
   }
 };
 
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 35bb1a19d40f0..f256d603ae626 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -221,7 +221,6 @@ class Parser : public CodeCompletionHandler {
   std::unique_ptr<PragmaHandler> MaxTokensHerePragmaHandler;
   std::unique_ptr<PragmaHandler> MaxTokensTotalPragmaHandler;
   std::unique_ptr<PragmaHandler> RISCVPragmaHandler;
-  std::unique_ptr<PragmaHandler> MCFuncPragmaHandler;
 
   std::unique_ptr<CommentHandler> CommentSemaHandler;
 
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index fb780fb75651d..b04502a57a9f7 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -557,12 +557,6 @@ void AIX::addClangTargetOptions(
   if (!Args.getLastArgNoClaim(options::OPT_fsized_deallocation,
                               options::OPT_fno_sized_deallocation))
     CC1Args.push_back("-fno-sized-deallocation");
-
-  if (Args.hasFlag(options::OPT_ferr_pragma_mc_func_aix,
-                   options::OPT_fno_err_pragma_mc_func_aix, false))
-    CC1Args.push_back("-ferr-pragma-mc-func-aix");
-  else
-    CC1Args.push_back("-fno-err-pragma-mc-func-aix");
 }
 
 void AIX::addProfileRTLibs(const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index aef4ddb758816..cc6f18b5b319f 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -14,7 +14,6 @@
 #include "clang/Basic/PragmaKinds.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/Token.h"
 #include "clang/Parse/LoopHint.h"
 #include "clang/Parse/ParseDiagnostic.h"
@@ -412,19 +411,6 @@ struct PragmaRISCVHandler : public PragmaHandler {
   Sema &Actions;
 };
 
-struct PragmaMCFuncHandler : public PragmaHandler {
-  PragmaMCFuncHandler(bool ReportError)
-      : PragmaHandler("mc_func"), ReportError(ReportError) {}
-  void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
-                    Token &Tok) override {
-    if (ReportError)
-      PP.Diag(Tok, diag::err_pragma_mc_func_not_supported);
-  }
-
-private:
-  bool ReportError = false;
-};
-
 void markAsReinjectedForRelexing(llvm::MutableArrayRef<clang::Token> Toks) {
   for (auto &T : Toks)
     T.setFlag(clang::Token::IsReinjected);
@@ -582,12 +568,6 @@ void Parser::initializePragmaHandlers() {
     RISCVPragmaHandler = std::make_unique<PragmaRISCVHandler>(Actions);
     PP.AddPragmaHandler("clang", RISCVPragmaHandler.get());
   }
-
-  if (getTargetInfo().getTriple().isOSAIX()) {
-    MCFuncPragmaHandler = std::make_unique<PragmaMCFuncHandler>(
-        PP.getPreprocessorOpts().ErrorOnPragmaMcfuncOnAIX);
-    PP.AddPragmaHandler(MCFuncPragmaHandler.get());
-  }
 }
 
 void Parser::resetPragmaHandlers() {
@@ -722,11 +702,6 @@ void Parser::resetPragmaHandlers() {
     PP.RemovePragmaHandler("clang", RISCVPragmaHandler.get());
     RISCVPragmaHandler.reset();
   }
-
-  if (getTargetInfo().getTriple().isOSAIX()) {
-    PP.RemovePragmaHandler(MCFuncPragmaHandler.get());
-    MCFuncPragmaHandler.reset();
-  }
 }
 
 /// Handle the annotation token produced for #pragma unused(...)
diff --git a/clang/test/Preprocessor/pragma_mc_func.c b/clang/test/Preprocessor/pragma_mc_func.c
deleted file mode 100644
index f0d3e49e5dddc..0000000000000
--- a/clang/test/Preprocessor/pragma_mc_func.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: not %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
-// RUN:   %s 2>&1 | FileCheck %s
-#pragma mc_func asm_barrier {"60000000"}
-
-// CHECK:  error: #pragma mc_func is not supported
-
-// Cases where no errors occur.
-// RUN: %clang --target=powerpc64-ibm-aix -fno-err-pragma-mc-func-aix -fsyntax-only %s
-// RUN: %clang --target=powerpc64-ibm-aix -ferr-pragma-mc-func-aix -fsyntax-only \
-// RUN:    -fno-err-pragma-mc-func-aix %s
-// RUN: %clang --target=powerpc64-ibm-aix -fsyntax-only %s
-// RUN: %clang --target=powerpc64-ibm-aix -Werror=unknown-pragmas \
-// RUN:   -fno-err-pragma-mc-func-aix -fsyntax-only %s
-
-// Cases where we have errors or warnings.
-// RUN: not %clang --target=powerpc64le-unknown-linux-gnu \
-// RUN:   -Werror=unknown-pragmas -fno-err-pragma-mc-func-aix -fsyntax-only %s 2>&1 | \
-// RUN:   FileCheck --check-prefix=UNUSED %s
-// RUN: %clang --target=powerpc64le-unknown-linux-gnu \
-// RUN:   -fno-err-pragma-mc-func-aix -fsyntax-only %s 2>&1 | \
-// RUN:   FileCheck --check-prefix=UNUSED %s
-
-// UNUSED: clang: warning: argument unused during compilation: '-fno-err-pragma-mc-func-aix' [-Wunused-command-line-argument]

From dec94b04c02fe18296e29a10f2db7e8da054be0f Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski <sdkrystian@gmail.com>
Date: Tue, 6 Aug 2024 12:40:44 -0400
Subject: [PATCH 176/427] [Clang][Sema] Make UnresolvedLookupExprs in class
 scope explicit specializations instantiation dependent (#100392)

A class member named by an expression in a member function that may instantiate to a static _or_ non-static member is represented by a `UnresolvedLookupExpr` in order to defer the implicit transformation to a class member access expression until instantiation. Since `ASTContext::getDecltypeType` only creates a `DecltypeType` that has a `DependentDecltypeType` as its canonical type when the operand is instantiation dependent, and since we do not transform types unless they are instantiation dependent, we need to mark the `UnresolvedLookupExpr` as instantiation dependent in order to correctly build a `DecltypeType` using the expression as its operand with a `DependentDecltypeType` canonical type. Fixes #99873.

(cherry picked from commit 55ea36002bd364518c20b3ce282640c920697bf7)
---
 clang/include/clang/AST/ExprCXX.h |  7 ++++---
 clang/lib/AST/ASTImporter.cpp     |  6 ++++--
 clang/lib/AST/ExprCXX.cpp         | 19 +++++++++++--------
 clang/lib/Sema/SemaCoroutine.cpp  |  3 ++-
 clang/lib/Sema/SemaDecl.cpp       |  2 +-
 clang/lib/Sema/SemaDeclCXX.cpp    |  2 +-
 clang/lib/Sema/SemaExpr.cpp       |  2 +-
 clang/lib/Sema/SemaExprMember.cpp |  3 ++-
 clang/lib/Sema/SemaOpenMP.cpp     |  6 ++++--
 clang/lib/Sema/SemaOverload.cpp   |  6 +++---
 clang/lib/Sema/SemaTemplate.cpp   |  3 ++-
 clang/lib/Sema/TreeTransform.h    |  8 ++++----
 clang/test/SemaCXX/decltype.cpp   | 25 +++++++++++++++++++++++++
 13 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index c2feac525c1ea..45cfd7bfb7f92 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -3229,7 +3229,7 @@ class UnresolvedLookupExpr final
                        const DeclarationNameInfo &NameInfo, bool RequiresADL,
                        const TemplateArgumentListInfo *TemplateArgs,
                        UnresolvedSetIterator Begin, UnresolvedSetIterator End,
-                       bool KnownDependent);
+                       bool KnownDependent, bool KnownInstantiationDependent);
 
   UnresolvedLookupExpr(EmptyShell Empty, unsigned NumResults,
                        bool HasTemplateKWAndArgsInfo);
@@ -3248,7 +3248,7 @@ class UnresolvedLookupExpr final
          NestedNameSpecifierLoc QualifierLoc,
          const DeclarationNameInfo &NameInfo, bool RequiresADL,
          UnresolvedSetIterator Begin, UnresolvedSetIterator End,
-         bool KnownDependent);
+         bool KnownDependent, bool KnownInstantiationDependent);
 
   // After canonicalization, there may be dependent template arguments in
   // CanonicalConverted But none of Args is dependent. When any of
@@ -3258,7 +3258,8 @@ class UnresolvedLookupExpr final
          NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
          const DeclarationNameInfo &NameInfo, bool RequiresADL,
          const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin,
-         UnresolvedSetIterator End, bool KnownDependent);
+         UnresolvedSetIterator End, bool KnownDependent,
+         bool KnownInstantiationDependent);
 
   static UnresolvedLookupExpr *CreateEmpty(const ASTContext &Context,
                                            unsigned NumResults,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 08ef09d353afc..e95992b99f7e9 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -8578,13 +8578,15 @@ ASTNodeImporter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) {
     return UnresolvedLookupExpr::Create(
         Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr,
         *ToTemplateKeywordLocOrErr, ToNameInfo, E->requiresADL(), &ToTAInfo,
-        ToDecls.begin(), ToDecls.end(), KnownDependent);
+        ToDecls.begin(), ToDecls.end(), KnownDependent,
+        /*KnownInstantiationDependent=*/E->isInstantiationDependent());
   }
 
   return UnresolvedLookupExpr::Create(
       Importer.getToContext(), *ToNamingClassOrErr, *ToQualifierLocOrErr,
       ToNameInfo, E->requiresADL(), ToDecls.begin(), ToDecls.end(),
-      /*KnownDependent=*/E->isTypeDependent());
+      /*KnownDependent=*/E->isTypeDependent(),
+      /*KnownInstantiationDependent=*/E->isInstantiationDependent());
 }
 
 ExpectedStmt
diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp
index 8d2a1b5611ccc..45e2badf2ddd4 100644
--- a/clang/lib/AST/ExprCXX.cpp
+++ b/clang/lib/AST/ExprCXX.cpp
@@ -402,10 +402,11 @@ UnresolvedLookupExpr::UnresolvedLookupExpr(
     NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
     const DeclarationNameInfo &NameInfo, bool RequiresADL,
     const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin,
-    UnresolvedSetIterator End, bool KnownDependent)
+    UnresolvedSetIterator End, bool KnownDependent,
+    bool KnownInstantiationDependent)
     : OverloadExpr(UnresolvedLookupExprClass, Context, QualifierLoc,
                    TemplateKWLoc, NameInfo, TemplateArgs, Begin, End,
-                   KnownDependent, false, false),
+                   KnownDependent, KnownInstantiationDependent, false),
       NamingClass(NamingClass) {
   UnresolvedLookupExprBits.RequiresADL = RequiresADL;
 }
@@ -420,7 +421,7 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create(
     const ASTContext &Context, CXXRecordDecl *NamingClass,
     NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
     bool RequiresADL, UnresolvedSetIterator Begin, UnresolvedSetIterator End,
-    bool KnownDependent) {
+    bool KnownDependent, bool KnownInstantiationDependent) {
   unsigned NumResults = End - Begin;
   unsigned Size = totalSizeToAlloc<DeclAccessPair, ASTTemplateKWAndArgsInfo,
                                    TemplateArgumentLoc>(NumResults, 0, 0);
@@ -428,7 +429,8 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create(
   return new (Mem) UnresolvedLookupExpr(
       Context, NamingClass, QualifierLoc,
       /*TemplateKWLoc=*/SourceLocation(), NameInfo, RequiresADL,
-      /*TemplateArgs=*/nullptr, Begin, End, KnownDependent);
+      /*TemplateArgs=*/nullptr, Begin, End, KnownDependent,
+      KnownInstantiationDependent);
 }
 
 UnresolvedLookupExpr *UnresolvedLookupExpr::Create(
@@ -436,7 +438,8 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create(
     NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
     const DeclarationNameInfo &NameInfo, bool RequiresADL,
     const TemplateArgumentListInfo *Args, UnresolvedSetIterator Begin,
-    UnresolvedSetIterator End, bool KnownDependent) {
+    UnresolvedSetIterator End, bool KnownDependent,
+    bool KnownInstantiationDependent) {
   unsigned NumResults = End - Begin;
   bool HasTemplateKWAndArgsInfo = Args || TemplateKWLoc.isValid();
   unsigned NumTemplateArgs = Args ? Args->size() : 0;
@@ -444,9 +447,9 @@ UnresolvedLookupExpr *UnresolvedLookupExpr::Create(
                                    TemplateArgumentLoc>(
       NumResults, HasTemplateKWAndArgsInfo, NumTemplateArgs);
   void *Mem = Context.Allocate(Size, alignof(UnresolvedLookupExpr));
-  return new (Mem) UnresolvedLookupExpr(Context, NamingClass, QualifierLoc,
-                                        TemplateKWLoc, NameInfo, RequiresADL,
-                                        Args, Begin, End, KnownDependent);
+  return new (Mem) UnresolvedLookupExpr(
+      Context, NamingClass, QualifierLoc, TemplateKWLoc, NameInfo, RequiresADL,
+      Args, Begin, End, KnownDependent, KnownInstantiationDependent);
 }
 
 UnresolvedLookupExpr *UnresolvedLookupExpr::CreateEmpty(
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index 81334c817b2af..4e180d648cd82 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -820,7 +820,8 @@ ExprResult Sema::BuildOperatorCoawaitLookupExpr(Scope *S, SourceLocation Loc) {
   Expr *CoawaitOp = UnresolvedLookupExpr::Create(
       Context, /*NamingClass*/ nullptr, NestedNameSpecifierLoc(),
       DeclarationNameInfo(OpName, Loc), /*RequiresADL*/ true, Functions.begin(),
-      Functions.end(), /*KnownDependent=*/false);
+      Functions.end(), /*KnownDependent=*/false,
+      /*KnownInstantiationDependent=*/false);
   assert(CoawaitOp);
   return CoawaitOp;
 }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 01231f8e385ef..fa4bfe13053d8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -1219,7 +1219,7 @@ Sema::NameClassification Sema::ClassifyName(Scope *S, CXXScopeSpec &SS,
   return NameClassification::OverloadSet(UnresolvedLookupExpr::Create(
       Context, Result.getNamingClass(), SS.getWithLocInContext(Context),
       Result.getLookupNameInfo(), ADL, Result.begin(), Result.end(),
-      /*KnownDependent=*/false));
+      /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false));
 }
 
 ExprResult
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 04b8d88cae217..66ca62f5d7c4c 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -1289,7 +1289,7 @@ static bool checkTupleLikeDecomposition(Sema &S,
           S.Context, nullptr, NestedNameSpecifierLoc(), SourceLocation(),
           DeclarationNameInfo(GetDN, Loc), /*RequiresADL=*/true, &Args,
           UnresolvedSetIterator(), UnresolvedSetIterator(),
-          /*KnownDependent=*/false);
+          /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false);
 
       Expr *Arg = E.get();
       E = S.BuildCallExpr(nullptr, Get, Loc, Arg, Loc);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 74c0e01705905..edb8b79a2220b 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3188,7 +3188,7 @@ ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS,
   UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(
       Context, R.getNamingClass(), SS.getWithLocInContext(Context),
       R.getLookupNameInfo(), NeedsADL, R.begin(), R.end(),
-      /*KnownDependent=*/false);
+      /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false);
 
   return ULE;
 }
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index 2070f3b7bb3a2..f1ba26f38520a 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -331,7 +331,8 @@ ExprResult Sema::BuildPossibleImplicitMemberExpr(
     return UnresolvedLookupExpr::Create(
         Context, R.getNamingClass(), SS.getWithLocInContext(Context),
         TemplateKWLoc, R.getLookupNameInfo(), /*RequiresADL=*/false,
-        TemplateArgs, R.begin(), R.end(), /*KnownDependent=*/true);
+        TemplateArgs, R.begin(), R.end(), /*KnownDependent=*/true,
+        /*KnownInstantiationDependent=*/true);
 
   case IMA_Error_StaticOrExplicitContext:
   case IMA_Error_Unrelated:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 67e3c1d9067f3..6cbc075302eb5 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -17968,7 +17968,8 @@ buildDeclareReductionRef(Sema &SemaRef, SourceLocation Loc, SourceRange Range,
     return UnresolvedLookupExpr::Create(
         SemaRef.Context, /*NamingClass=*/nullptr,
         ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), ReductionId,
-        /*ADL=*/true, ResSet.begin(), ResSet.end(), /*KnownDependent=*/false);
+        /*ADL=*/true, ResSet.begin(), ResSet.end(), /*KnownDependent=*/false,
+        /*KnownInstantiationDependent=*/false);
   }
   // Lookup inside the classes.
   // C++ [over.match.oper]p3:
@@ -20834,7 +20835,8 @@ static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S,
     return UnresolvedLookupExpr::Create(
         SemaRef.Context, /*NamingClass=*/nullptr,
         MapperIdScopeSpec.getWithLocInContext(SemaRef.Context), MapperId,
-        /*ADL=*/false, URS.begin(), URS.end(), /*KnownDependent=*/false);
+        /*ADL=*/false, URS.begin(), URS.end(), /*KnownDependent=*/false,
+        /*KnownInstantiationDependent=*/false);
   }
   SourceLocation Loc = MapperId.getLoc();
   // [OpenMP 5.0], 2.19.7.3 declare mapper Directive, Restrictions
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 554a2df14bea6..28fd3b06156b9 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -14083,9 +14083,9 @@ ExprResult Sema::CreateUnresolvedLookupExpr(CXXRecordDecl *NamingClass,
                                             DeclarationNameInfo DNI,
                                             const UnresolvedSetImpl &Fns,
                                             bool PerformADL) {
-  return UnresolvedLookupExpr::Create(Context, NamingClass, NNSLoc, DNI,
-                                      PerformADL, Fns.begin(), Fns.end(),
-                                      /*KnownDependent=*/false);
+  return UnresolvedLookupExpr::Create(
+      Context, NamingClass, NNSLoc, DNI, PerformADL, Fns.begin(), Fns.end(),
+      /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false);
 }
 
 ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 87b1f98bbe5ac..ca71542d886fa 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4436,7 +4436,8 @@ ExprResult Sema::BuildTemplateIdExpr(const CXXScopeSpec &SS,
   UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(
       Context, R.getNamingClass(), SS.getWithLocInContext(Context),
       TemplateKWLoc, R.getLookupNameInfo(), RequiresADL, TemplateArgs,
-      R.begin(), R.end(), KnownDependent);
+      R.begin(), R.end(), KnownDependent,
+      /*KnownInstantiationDependent=*/false);
 
   // Model the templates with UnresolvedTemplateTy. The expression should then
   // either be transformed in an instantiation or be diagnosed in
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 84e846356e437..51e6a4845bf6f 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10541,7 +10541,7 @@ TreeTransform<Derived>::TransformOMPReductionClause(OMPReductionClause *C) {
           SemaRef.Context, /*NamingClass=*/nullptr,
           ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
           /*ADL=*/true, Decls.begin(), Decls.end(),
-          /*KnownDependent=*/false));
+          /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false));
     } else
       UnresolvedReductions.push_back(nullptr);
   }
@@ -10588,7 +10588,7 @@ OMPClause *TreeTransform<Derived>::TransformOMPTaskReductionClause(
           SemaRef.Context, /*NamingClass=*/nullptr,
           ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
           /*ADL=*/true, Decls.begin(), Decls.end(),
-          /*KnownDependent=*/false));
+          /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false));
     } else
       UnresolvedReductions.push_back(nullptr);
   }
@@ -10634,7 +10634,7 @@ TreeTransform<Derived>::TransformOMPInReductionClause(OMPInReductionClause *C) {
           SemaRef.Context, /*NamingClass=*/nullptr,
           ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), NameInfo,
           /*ADL=*/true, Decls.begin(), Decls.end(),
-          /*KnownDependent=*/false));
+          /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false));
     } else
       UnresolvedReductions.push_back(nullptr);
   }
@@ -10816,7 +10816,7 @@ bool transformOMPMappableExprListClause(
           TT.getSema().Context, /*NamingClass=*/nullptr,
           MapperIdScopeSpec.getWithLocInContext(TT.getSema().Context),
           MapperIdInfo, /*ADL=*/true, Decls.begin(), Decls.end(),
-          /*KnownDependent=*/false));
+          /*KnownDependent=*/false, /*KnownInstantiationDependent=*/false));
     } else {
       UnresolvedMappers.push_back(nullptr);
     }
diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp
index 96abb60836e40..a37b89939565d 100644
--- a/clang/test/SemaCXX/decltype.cpp
+++ b/clang/test/SemaCXX/decltype.cpp
@@ -139,6 +139,31 @@ namespace GH58674 {
   }
 }
 
+namespace GH99873 {
+struct B {
+  int x;
+};
+
+template<typename T>
+struct A {
+  template<typename U>
+  constexpr int f() const {
+    return 1;
+  }
+
+  template<>
+  constexpr int f<int>() const {
+    return decltype(B::x)();
+  }
+};
+
+// This shouldn't crash.
+static_assert(A<int>().f<int>() == 0, "");
+// The result should not be dependent.
+static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion failed due to requirement 'GH99873::A<int>().f<int>() != 0'}}
+                                           // expected-note@-1 {{expression evaluates to '0 != 0'}}
+}
+
 template<typename>
 class conditional {
 };

From a3b18fcd241a6ce23653f69df37b59166ea0484d Mon Sep 17 00:00:00 2001
From: Brian Cain <bcain@quicinc.com>
Date: Mon, 12 Aug 2024 20:07:08 -0500
Subject: [PATCH 177/427] [libc++] Use a different smart ptr type alias
 (#102089)

The `_SP` type is used by some C libraries and this alias could conflict
with it.

(cherry picked from commit 7951673d408ee64744d0b924a49db78e8243d876)
---
 libcxx/include/__memory/inout_ptr.h            | 10 +++++-----
 libcxx/include/__memory/out_ptr.h              |  8 ++++----
 .../test/libcxx/system_reserved_names.gen.py   | 18 ++++++++++++++++--
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/__memory/inout_ptr.h b/libcxx/include/__memory/inout_ptr.h
index 72e1a21ad6867..e5f3ac5d027e8 100644
--- a/libcxx/include/__memory/inout_ptr.h
+++ b/libcxx/include/__memory/inout_ptr.h
@@ -63,17 +63,17 @@ class _LIBCPP_TEMPLATE_VIS inout_ptr_t {
       }
     }
 
-    using _SP = __pointer_of_or_t<_Smart, _Pointer>;
+    using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>;
     if constexpr (is_pointer_v<_Smart>) {
-      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); },
+      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); },
                  std::move(__a_));
     } else if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) {
-      std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); },
+      std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); },
                  std::move(__a_));
     } else {
-      static_assert(is_constructible_v<_Smart, _SP, _Args...>,
+      static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>,
                     "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args...");
-      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); },
+      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); },
                  std::move(__a_));
     }
   }
diff --git a/libcxx/include/__memory/out_ptr.h b/libcxx/include/__memory/out_ptr.h
index 95aa2029c9231..fd99110790cc8 100644
--- a/libcxx/include/__memory/out_ptr.h
+++ b/libcxx/include/__memory/out_ptr.h
@@ -58,14 +58,14 @@ class _LIBCPP_TEMPLATE_VIS out_ptr_t {
       return;
     }
 
-    using _SP = __pointer_of_or_t<_Smart, _Pointer>;
+    using _SmartPtr = __pointer_of_or_t<_Smart, _Pointer>;
     if constexpr (__resettable_smart_pointer_with_args<_Smart, _Pointer, _Args...>) {
-      std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); },
+      std::apply([&](auto&&... __args) { __s_.reset(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); },
                  std::move(__a_));
     } else {
-      static_assert(is_constructible_v<_Smart, _SP, _Args...>,
+      static_assert(is_constructible_v<_Smart, _SmartPtr, _Args...>,
                     "The smart pointer must be constructible from arguments of types _Smart, _Pointer, _Args...");
-      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SP>(__p_), std::forward<_Args>(__args)...); },
+      std::apply([&](auto&&... __args) { __s_ = _Smart(static_cast<_SmartPtr>(__p_), std::forward<_Args>(__args)...); },
                  std::move(__a_));
     }
   }
diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py
index 0d935a18addee..956a8d1abe3c3 100644
--- a/libcxx/test/libcxx/system_reserved_names.gen.py
+++ b/libcxx/test/libcxx/system_reserved_names.gen.py
@@ -17,7 +17,8 @@
 from libcxx.header_information import lit_header_restrictions, public_headers
 
 for header in public_headers:
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.compile.pass.cpp
 {lit_header_restrictions.get(header, '')}
 
@@ -162,6 +163,18 @@
 #define erase SYSTEM_RESERVED_NAME
 #define refresh SYSTEM_RESERVED_NAME
 
+// Dinkumware libc ctype.h uses these definitions
+#define _XA SYSTEM_RESERVED_NAME
+#define _XS SYSTEM_RESERVED_NAME
+#define _BB SYSTEM_RESERVED_NAME
+#define _CN SYSTEM_RESERVED_NAME
+#define _DI SYSTEM_RESERVED_NAME
+#define _LO SYSTEM_RESERVED_NAME
+#define _PU SYSTEM_RESERVED_NAME
+#define _SP SYSTEM_RESERVED_NAME
+#define _UP SYSTEM_RESERVED_NAME
+#define _XD SYSTEM_RESERVED_NAME
+
 #include <{header}>
 
 // Make sure we don't swallow the definition of the macros we push/pop
@@ -172,4 +185,5 @@
 static_assert(__builtin_strcmp(STRINGIFY(move), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, "");
 static_assert(__builtin_strcmp(STRINGIFY(erase), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, "");
 static_assert(__builtin_strcmp(STRINGIFY(refresh), STRINGIFY(SYSTEM_RESERVED_NAME)) == 0, "");
-""")
+"""
+    )

From 028cef89ecd71787f704d70cc99689b75425659c Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Tue, 13 Aug 2024 13:39:42 +0200
Subject: [PATCH 178/427] [CodeGen][ARM64EC] Define hybrid_patchable EXP thunk
 symbol as a function. (#102898)

This is needed for MSVC link.exe to generate redirection metadata for hybrid patchable thunks.

(cherry picked from commit d550ada5ab6cd6e49de71ac4c9aa27ced4c11de0)
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  7 ++++++
 .../AArch64/arm64ec-hybrid-patchable.ll       | 24 +++++++++++++++----
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 3c9b07ad45bf2..c64454cc253c3 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M,
       StringRef ExpStr = cast<MDString>(Node->getOperand(0))->getString();
       MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr);
       MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName());
+
+      OutStreamer->beginCOFFSymbolDef(ExpSym);
+      OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                      << COFF::SCT_COMPLEX_TYPE_SHIFT);
+      OutStreamer->endCOFFSymbolDef();
+
       OutStreamer->beginCOFFSymbolDef(Sym);
       OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
       OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
index 64fb5b36b2c62..1ed6a273338ab 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll
@@ -240,6 +240,10 @@ define dso_local void @caller() nounwind {
 ; CHECK-NEXT:      .section        .drectve,"yni"
 ; CHECK-NEXT:      .ascii  " /EXPORT:exp"
 
+; CHECK-NEXT:      .def    "EXP+#func";
+; CHECK-NEXT:      .scl    2;
+; CHECK-NEXT:      .type   32;
+; CHECK-NEXT:      .endef
 ; CHECK-NEXT:      .def    func;
 ; CHECK-NEXT:      .scl    2;
 ; CHECK-NEXT:      .type   32;
@@ -252,6 +256,10 @@ define dso_local void @caller() nounwind {
 ; CHECK-NEXT:      .type   32;
 ; CHECK-NEXT:      .endef
 ; CHECK-NEXT:  .set "#func", "#func$hybpatch_thunk"{{$}}
+; CHECK-NEXT:      .def    "EXP+#has_varargs";
+; CHECK-NEXT:      .scl    2;
+; CHECK-NEXT:      .type   32;
+; CHECK-NEXT:      .endef
 ; CHECK-NEXT:      .def    has_varargs;
 ; CHECK-NEXT:      .scl    2;
 ; CHECK-NEXT:      .type   32;
@@ -264,6 +272,10 @@ define dso_local void @caller() nounwind {
 ; CHECK-NEXT:      .type   32;
 ; CHECK-NEXT:      .endef
 ; CHECK-NEXT:  .set "#has_varargs", "#has_varargs$hybpatch_thunk"
+; CHECK-NEXT:      .def    "EXP+#has_sret";
+; CHECK-NEXT:      .scl    2;
+; CHECK-NEXT:      .type   32;
+; CHECK-NEXT:      .endef
 ; CHECK-NEXT:      .def    has_sret;
 ; CHECK-NEXT:      .scl    2;
 ; CHECK-NEXT:      .type   32;
@@ -276,6 +288,10 @@ define dso_local void @caller() nounwind {
 ; CHECK-NEXT:      .type   32;
 ; CHECK-NEXT:      .endef
 ; CHECK-NEXT:  .set "#has_sret", "#has_sret$hybpatch_thunk"
+; CHECK-NEXT:      .def    "EXP+#exp";
+; CHECK-NEXT:      .scl    2;
+; CHECK-NEXT:      .type   32;
+; CHECK-NEXT:      .endef
 ; CHECK-NEXT:      .def    exp;
 ; CHECK-NEXT:      .scl    2;
 ; CHECK-NEXT:      .type   32;
@@ -295,18 +311,18 @@ define dso_local void @caller() nounwind {
 ; SYM:      [78](sec 20)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 #exp$hybpatch_thunk
 ; SYM:      [110](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 func
 ; SYM-NEXT: AUX indx 112 srch 3
-; SYM-NEXT: [112](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 EXP+#func
+; SYM-NEXT: [112](sec  0)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 EXP+#func
 ; SYM:      [116](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 #func
 ; SYM-NEXT: AUX indx 53 srch 3
 ; SYM:      [122](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 has_varargs
 ; SYM-NEXT: AUX indx 124 srch 3
-; SYM-NEXT: [124](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 EXP+#has_varargs
+; SYM-NEXT: [124](sec  0)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 EXP+#has_varargs
 ; SYM-NEXT: [125](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 has_sret
 ; SYM-NEXT: AUX indx 127 srch 3
-; SYM-NEXT: [127](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 EXP+#has_sret
+; SYM-NEXT: [127](sec  0)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 EXP+#has_sret
 ; SYM-NEXT: [128](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 exp
 ; SYM-NEXT: AUX indx 130 srch 3
-; SYM-NEXT: [130](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 EXP+#exp
+; SYM-NEXT: [130](sec  0)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 EXP+#exp
 ; SYM-NEXT: [131](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 #has_varargs
 ; SYM-NEXT: AUX indx 58 srch 3
 ; SYM-NEXT: [133](sec  0)(fl 0x00)(ty   0)(scl  69) (nx 1) 0x00000000 #has_sret

From 9c3d4f344685f7695bb8e05677d8684fa3aa1f72 Mon Sep 17 00:00:00 2001
From: Zaara Syeda <syzaara@ca.ibm.com>
Date: Wed, 7 Aug 2024 09:59:45 -0400
Subject: [PATCH 179/427] [PPC][AIX] Save/restore r31 when using base pointer
 (#100182)

When the base pointer r30 is used to hold the stack pointer, r30 is
spilled in the prologue. On AIX registers are saved from highest to
lowest, so r31 also needs to be saved.

Fixes https://github.com/llvm/llvm-project/issues/96411

(cherry picked from commit d07f106e512c08455b76cc1889ee48318e73c810)
---
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  | 14 ++++++++++++--
 llvm/test/CodeGen/PowerPC/aix-base-pointer.ll |  5 +++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1963582ce6863..a57ed33bda9c7 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
         // R0 cannot be used as a base register, but it can be used as an
         // index in a store-indexed.
         int LastOffset = 0;
-        if (HasFP)  {
+        if (HasFP) {
           // R0 += (FPOffset-LastOffset).
           // Need addic, since addi treats R0 as 0.
           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
@@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // code. Same goes for the base pointer and the PIC base register.
   if (needsFP(MF))
     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
-  if (RegInfo->hasBasePointer(MF))
+  if (RegInfo->hasBasePointer(MF)) {
     SavedRegs.reset(RegInfo->getBaseRegister(MF));
+    // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match
+    // AIX trackback table requirement.
+    if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) &&
+        Subtarget.isAIXABI()) {
+      assert(
+          (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) &&
+          "Invalid base register on AIX!");
+      SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31);
+    }
+  }
   if (FI->usesPICBase())
     SavedRegs.reset(PPC::R30);
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
index ab222d770360c..5e66e5ec27638 100644
--- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
@@ -6,6 +6,7 @@
 
 ; Use an overaligned buffer to force base-pointer usage. Test verifies:
 ; - base pointer register (r30) is saved/defined/restored.
+; - frame pointer register (r31) is saved/defined/restored.
 ; - stack frame is allocated with correct alignment.
 ; - Address of %AlignedBuffer is calculated based off offset from the stack
 ;   pointer.
@@ -25,7 +26,9 @@ declare void @callee(ptr)
 ; 32BIT:         subfic 0, 0, -224
 ; 32BIT:         stwux 1, 1, 0
 ; 32BIT:         addi 3, 1, 64
+; 32BIT:         stw 31, -12(30)
 ; 32BIT:         bl .callee
+; 32BIT:         lwz 31, -12(30)
 ; 32BIT:         mr 1, 30
 ; 32BIT:         lwz 30, -16(1)
 
@@ -36,6 +39,8 @@ declare void @callee(ptr)
 ; 64BIT:         subfic 0, 0, -288
 ; 64BIT:         stdux 1, 1, 0
 ; 64BIT:         addi 3, 1, 128
+; 64BIT:         std 31, -16(30)
 ; 64BIT:         bl .callee
+; 64BIT:         ld 31, -16(30)
 ; 64BIT:         mr 1, 30
 ; 64BIT:         ld 30, -24(1)

From ca3f3f63275a683c170251be30430a05428113a9 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Tue, 13 Aug 2024 12:39:33 -0700
Subject: [PATCH 180/427] [clang-format] Fix annotation of braces enclosing
 stringification (#102998)

Fixes #102937.

(cherry picked from commit ee2359968fa307ef45254c816e14df33374168cd)
---
 clang/lib/Format/UnwrappedLineParser.cpp      |  3 +++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index d406a531a5c0c..688c7c5b1e977 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -507,6 +507,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
     if (!Line->InMacroBody && !Style.isTableGen()) {
       // Skip PPDirective lines and comments.
       while (NextTok->is(tok::hash)) {
+        NextTok = Tokens->getNextToken();
+        if (NextTok->is(tok::pp_not_keyword))
+          break;
         do {
           NextTok = Tokens->getNextToken();
         } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 0b5475ea95989..c20b50d14b80b 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3178,6 +3178,17 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_BRACE_KIND(Tokens[17], BK_Block);
   EXPECT_BRACE_KIND(Tokens[22], BK_Block);
   EXPECT_BRACE_KIND(Tokens[26], BK_Block);
+
+  Tokens = annotate("{\n"
+                    "#define M(x) \\\n"
+                    "  return {#x};\n"
+                    "}");
+  ASSERT_EQ(Tokens.size(), 15u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_brace, TT_BlockLBrace);
+  EXPECT_BRACE_KIND(Tokens[0], BK_Block);
+  EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit);
+  EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit);
+  EXPECT_BRACE_KIND(Tokens[13], BK_Block);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) {

From 67b06b42973c0a207a44314524376551f4d19a4a Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs@apple.com>
Date: Wed, 14 Aug 2024 07:38:14 -0700
Subject: [PATCH 181/427] [clang][AArch64] Point the nofp ABI check diagnostics
 at the callee (#103392)

... whereever we have the Decl for it, and even when we don't keep the
SourceLocation of it aimed at the call site.

Fixes: #102983
(cherry picked from commit 019ef522756886caa258daf68d877f84abc1b878)
---
 clang/lib/CodeGen/Targets/AArch64.cpp              | 14 ++++++++------
 clang/test/CodeGen/aarch64-soft-float-abi-errors.c | 10 +++++-----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 1dec3cd40ebd2..97381f673c284 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -840,12 +840,13 @@ static bool isStreamingCompatible(const FunctionDecl *F) {
 static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
                                  const StringRef ABIName,
                                  const AArch64ABIInfo &ABIInfo,
-                                 const QualType &Ty, const NamedDecl *D) {
+                                 const QualType &Ty, const NamedDecl *D,
+                                 SourceLocation loc) {
   const Type *HABase = nullptr;
   uint64_t HAMembers = 0;
   if (Ty->isFloatingType() || Ty->isVectorType() ||
       ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
-    Diags.Report(D->getLocation(), diag::err_target_unsupported_type_for_abi)
+    Diags.Report(loc, diag::err_target_unsupported_type_for_abi)
         << D->getDeclName() << Ty << ABIName;
   }
 }
@@ -860,10 +861,11 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
 
   if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo,
-                         FuncDecl->getReturnType(), FuncDecl);
+                         FuncDecl->getReturnType(), FuncDecl,
+                         FuncDecl->getLocation());
     for (ParmVarDecl *PVD : FuncDecl->parameters()) {
       diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, PVD->getType(),
-                           PVD);
+                           PVD, FuncDecl->getLocation());
     }
   }
 }
@@ -908,11 +910,11 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
     return;
 
   diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, ReturnType,
-                       Caller);
+                       Callee ? Callee : Caller, CallLoc);
 
   for (const CallArg &Arg : Args)
     diagnoseIfNeedsFPReg(CGM.getDiags(), TI.getABI(), ABIInfo, Arg.getType(),
-                         Caller);
+                         Callee ? Callee : Caller, CallLoc);
 }
 
 void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
index 95b7668aca1b0..6961ee4b88886 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
@@ -69,7 +69,7 @@ inline void test_float_arg_inline(float a) {}
 inline void test_float_arg_inline_used(float a) {}
 // nofp-hard-opt-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
 void use_inline() { test_float_arg_inline_used(1.0f); }
-// nofp-hard-error@-1 {{'use_inline' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-error@-1 {{'test_float_arg_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}}
 
 // The always_inline attribute causes an inline function to always be
 // code-genned, even at -O0, so we always emit the error.
@@ -77,7 +77,7 @@ __attribute((always_inline))
 inline void test_float_arg_always_inline_used(float a) {}
 // nofp-hard-error@-1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
 void use_always_inline() { test_float_arg_always_inline_used(1.0f); }
-// nofp-hard-error@-1 {{'use_always_inline' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-error@-1 {{'test_float_arg_always_inline_used' requires 'float' type support, but ABI 'aapcs' does not support it}}
 
 // Floating-point expressions, global variables and local variables do not
 // affect the ABI, so are allowed. GCC does reject some uses of floating point
@@ -103,9 +103,9 @@ int test_var_double(int a) {
 extern void extern_float_arg(float);
 extern float extern_float_ret(void);
 void call_extern_float_arg() { extern_float_arg(1.0f); }
-// nofp-hard-error@-1 {{'call_extern_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-error@-1 {{'extern_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}}
 void call_extern_float_ret() { extern_float_ret(); }
-// nofp-hard-error@-1 {{'call_extern_float_ret' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-error@-1 {{'extern_float_ret' requires 'float' type support, but ABI 'aapcs' does not support it}}
 
 // Definitions of variadic functions, and calls to them which only use integer
 // argument registers, are both fine.
@@ -115,7 +115,7 @@ void call_variadic_int() { variadic(0, 1); }
 // Calls to variadic functions with floating-point arguments are an error,
 // since this would require floating-point registers.
 void call_variadic_double() { variadic(0, 1.0); }
-// nofp-hard-error@-1 {{'call_variadic_double' requires 'double' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-error@-1 {{'variadic' requires 'double' type support, but ABI 'aapcs' does not support it}}
 
 // Calls through function pointers are also diagnosed.
 void (*fptr)(float);

From 4d4a4100f68dfc50bd3b67de40101761be8ffdb7 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 14 Aug 2024 14:04:22 -0400
Subject: [PATCH 182/427] [libc++] Fix ambiguous constructors for std::complex
 and std::optional (#103409)

Fixes #101960

(cherry picked from commit 4d08bb11eea5907fa9cdfe4c7bc9d5c91e79c6a7)
---
 libcxx/include/complex                        |  9 +++--
 libcxx/include/optional                       |  9 +++--
 .../gh_101960_ambiguous_ctor.pass.cpp         | 38 +++++++++++++++++++
 .../gh_101960_internal_ctor.compile.pass.cpp  | 28 ++++++++++++++
 4 files changed, 78 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp
 create mode 100644 libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp

diff --git a/libcxx/include/complex b/libcxx/include/complex
index 22271acaf7358..e6534025de57e 100644
--- a/libcxx/include/complex
+++ b/libcxx/include/complex
@@ -421,7 +421,8 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(float __re = 0.0f, float __im = 0.0f) : __re_(__re), __im_(__im) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex float __v)
+  template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex float __v)
       : __re_(__real__ __v), __im_(__imag__ __v) {}
 
   _LIBCPP_HIDE_FROM_ABI explicit _LIBCPP_CONSTEXPR complex(const complex<double>& __c);
@@ -517,7 +518,8 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(double __re = 0.0, double __im = 0.0) : __re_(__re), __im_(__im) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex double __v)
+  template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex double __v)
       : __re_(__real__ __v), __im_(__imag__ __v) {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c);
@@ -617,7 +619,8 @@ public:
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(long double __re = 0.0L, long double __im = 0.0L)
       : __re_(__re), __im_(__im) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(__from_builtin_tag, _Complex long double __v)
+  template <class _Tag, __enable_if_t<_IsSame<_Tag, __from_builtin_tag>::value, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit complex(_Tag, _Complex long double __v)
       : __re_(__real__ __v), __im_(__imag__ __v) {}
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR complex(const complex<float>& __c);
diff --git a/libcxx/include/optional b/libcxx/include/optional
index f9cbcbfa595d1..41d7515a2b689 100644
--- a/libcxx/include/optional
+++ b/libcxx/include/optional
@@ -301,7 +301,7 @@ struct __optional_destruct_base<_Tp, false> {
 
 #  if _LIBCPP_STD_VER >= 23
   template <class _Fp, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI constexpr __optional_destruct_base(
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit __optional_destruct_base(
       __optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args)
       : __val_(std::invoke(std::forward<_Fp>(__f), std::forward<_Args>(__args)...)), __engaged_(true) {}
 #  endif
@@ -707,8 +707,11 @@ public:
   }
 
 #  if _LIBCPP_STD_VER >= 23
-  template <class _Fp, class... _Args>
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(__optional_construct_from_invoke_tag, _Fp&& __f, _Args&&... __args)
+  template <class _Tag,
+            class _Fp,
+            class... _Args,
+            __enable_if_t<_IsSame<_Tag, __optional_construct_from_invoke_tag>::value, int> = 0>
+  _LIBCPP_HIDE_FROM_ABI constexpr explicit optional(_Tag, _Fp&& __f, _Args&&... __args)
       : __base(__optional_construct_from_invoke_tag{}, std::forward<_Fp>(__f), std::forward<_Args>(__args)...) {}
 #  endif
 
diff --git a/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp
new file mode 100644
index 0000000000000..bffe8764386a7
--- /dev/null
+++ b/libcxx/test/std/numerics/complex.number/complex.special/gh_101960_ambiguous_ctor.pass.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <complex>
+
+// Regression test for https://github.com/llvm/llvm-project/issues/101960 where we used to
+// trigger an ambiguous constructor.
+
+#include <complex>
+#include <cassert>
+
+struct NastyConvertible {
+  template <class T>
+  operator T() const {
+    return T(0);
+  }
+};
+
+template <class T>
+void test() {
+  NastyConvertible nasty;
+  std::complex<T> x(nasty, nasty);
+  assert(x.real() == T(0));
+  assert(x.imag() == T(0));
+}
+
+int main(int, char**) {
+  test<float>();
+  test<double>();
+  test<long double>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp
new file mode 100644
index 0000000000000..1a1d6f52a5fec
--- /dev/null
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/gh_101960_internal_ctor.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <optional>
+
+// Regression test for https://github.com/llvm/llvm-project/issues/101960 where a constructor
+// of std::optional that should have been private was instead publicly available.
+
+#include <optional>
+#include <type_traits>
+
+struct NastyConvertible {
+  template <class T>
+  operator T() {
+    return 0;
+  }
+};
+
+using F = int(int);
+
+static_assert(!std::is_constructible<std::optional<int>, NastyConvertible, int(int), int>::value);

From 1a5c4e5c4fded8293985dc1875a971f7783cfc45 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 23 Jul 2024 14:41:57 -0500
Subject: [PATCH 183/427] [Clang] Correctly forward `--cuda-path` to the nvlink
 wrapper (#100170)

Summary:
This was not forwarded properly as it would try to pass it to `nvlink`.

Fixes https://github.com/llvm/llvm-project/issues/100168

(cherry picked from commit 7e1fcf5dd657d465c3fc846f56c6f9d3a4560b43)
---
 clang/lib/Driver/ToolChains/Cuda.cpp           | 4 ++++
 clang/test/Driver/linker-wrapper-passes.c      | 1 -
 clang/test/Driver/nvlink-wrapper.c             | 7 +++++++
 clang/tools/clang-nvlink-wrapper/NVLinkOpts.td | 4 ++--
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 59453c484ae4f..61d12b10dfb62 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -609,6 +609,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(
         "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ)));
 
+  if (Args.hasArg(options::OPT_cuda_path_EQ))
+    CmdArgs.push_back(Args.MakeArgString(
+        "--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ)));
+
   // Add paths specified in LIBRARY_PATH environment variable as -L options.
   addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 
diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c
index aadcf472e9b63..b257c942afa07 100644
--- a/clang/test/Driver/linker-wrapper-passes.c
+++ b/clang/test/Driver/linker-wrapper-passes.c
@@ -3,7 +3,6 @@
 // REQUIRES: llvm-plugins, llvm-examples
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
-
 // Setup.
 // RUN: mkdir -p %t
 // RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \
diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c
index fdda93f1f9cdc..318315ddaca34 100644
--- a/clang/test/Driver/nvlink-wrapper.c
+++ b/clang/test/Driver/nvlink-wrapper.c
@@ -63,3 +63,10 @@ int baz() { return y + x; }
 // RUN:   -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LTO
 // LTO: ptxas{{.*}} -m64 -c [[PTX:.+]].s -O3 -arch sm_52 -o [[CUBIN:.+]].cubin
 // LTO: nvlink{{.*}} -arch sm_52 -o a.out [[CUBIN]].cubin {{.*}}-u-{{.*}}.cubin {{.*}}-y-{{.*}}.cubin
+
+//
+// Check that we don't forward some arguments.
+//
+// RUN: clang-nvlink-wrapper --dry-run %t.o %t-u.o %t-y.a \
+// RUN:   -arch sm_52 --cuda-path/opt/cuda -o a.out 2>&1 | FileCheck %s --check-prefix=PATH
+// PATH-NOT: --cuda-path=/opt/cuda
diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
index e84b530f2787d..8c80a51b12a44 100644
--- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
+++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
@@ -12,9 +12,9 @@ def verbose : Flag<["-"], "v">, HelpText<"Print verbose information">;
 def version : Flag<["--"], "version">,
   HelpText<"Display the version number and exit">;
 
-def cuda_path_EQ : Joined<["--"], "cuda-path=">,
+def cuda_path_EQ : Joined<["--"], "cuda-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"<dir>">, HelpText<"Set the system CUDA path">;
-def ptxas_path_EQ : Joined<["--"], "ptxas-path=">,
+def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Flags<[WrapperOnlyOption]>,
   MetaVarName<"<dir>">, HelpText<"Set the 'ptxas' path">;
 
 def o : JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,

From c45fc691a2a39318fc146ba1665a2fe2d9f43b2b Mon Sep 17 00:00:00 2001
From: Yeting Kuo <46629943+yetingk@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:37:42 +0800
Subject: [PATCH 184/427] [RISCV] Use experimental.vp.splat to splat specific
 vector length elements. (#101329)

Previously, llvm IR is hard to create a scalable vector splat with a
specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do
this work. But the two rvv intrinsics needs strict type constraint which
can not support fixed vector types and illegal vector types. Using
vp.splat could preserve old functionality and also generate more
optimized code for vector types and illegal vectors.
This patch also fixes crash for getEVT not serving ptr types.

(cherry picked from commit 87af9ee870ad7ca93abced0b09459c3760dec891)
---
 llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 19 +----
 .../RISCV/rvv/fixed-vectors-strided-vpload.ll |  8 +-
 llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 80 +++++++++++++++++--
 3 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5ab..be2e880ecd3a9 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
   auto *VTy = cast<VectorType>(II.getType());
 
   IRBuilder<> Builder(&II);
-
-  // Extend VL from i32 to XLen if needed.
-  if (ST->is64Bit())
-    VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
   Type *STy = VTy->getElementType();
   Value *Val = Builder.CreateLoad(STy, BasePtr);
-  const auto &TLI = *ST->getTargetLowering();
-  Value *Res;
-
-  // TODO: Also support fixed/illegal vector types to splat with evl = vl.
-  if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
-    unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
-                                              : Intrinsic::riscv_vmv_v_x;
-    Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
-                                  {PoisonValue::get(VTy), Val, VL});
-  } else {
-    Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
-  }
+  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+                                       {Val, II.getOperand(2), VL});
 
   II.replaceAllUsesWith(Res);
   II.eraseFromParent();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index b8c7037580c46..849f98c26f459 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
 define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-OPT:       # %bb.0:
-; CHECK-OPT-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-OPT-NEXT:    vlse8.v v8, (a0), zero
 ; CHECK-OPT-NEXT:    ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
 ; CHECK-NO-OPT:       # %bb.0:
 ; CHECK-NO-OPT-NEXT:    lbu a0, 0(a0)
-; CHECK-NO-OPT-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NO-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
 ; CHECK-NO-OPT-NEXT:    vmv.v.x v8, a0
 ; CHECK-NO-OPT-NEXT:    ret
   %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3)
@@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
 define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
 ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-OPT:       # %bb.0:
-; CHECK-OPT-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-OPT-NEXT:    vlse16.v v8, (a0), zero
 ; CHECK-OPT-NEXT:    ret
 ;
 ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
 ; CHECK-NO-OPT:       # %bb.0:
 ; CHECK-NO-OPT-NEXT:    flh fa5, 0(a0)
-; CHECK-NO-OPT-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NO-OPT-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
 ; CHECK-NO-OPT-NEXT:    vfmv.v.f v8, fa5
 ; CHECK-NO-OPT-NEXT:    ret
   %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0010f64a93fd6..14976f21b7dbb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
 ; RUN:   -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64
 
 declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, <vscale x 1 x i1>, i32)
 
@@ -823,15 +823,15 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
   ret <vscale x 1 x half> %load
 }
 
-define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr) {
-; CHECK-RV32-LABEL: zero_strided_vadd.vx:
+define <vscale x 1 x i64> @zero_strided_vadd_nxv1i64(<vscale x 1 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64:
 ; CHECK-RV32:       # %bb.0:
 ; CHECK-RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
 ; CHECK-RV32-NEXT:    vlse64.v v9, (a0), zero
 ; CHECK-RV32-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-RV32-NEXT:    ret
 ;
-; CHECK-RV64-LABEL: zero_strided_vadd.vx:
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64:
 ; CHECK-RV64:       # %bb.0:
 ; CHECK-RV64-NEXT:    ld a0, 0(a0)
 ; CHECK-RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
@@ -842,3 +842,69 @@ define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr)
   %w = add <vscale x 1 x i64> %v, %load
   ret <vscale x 1 x i64> %w
 }
+
+define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    csrr a1, vlenb
+; CHECK-RV32-NEXT:    srli a2, a1, 3
+; CHECK-RV32-NEXT:    sub a3, a2, a1
+; CHECK-RV32-NEXT:    sltu a4, a2, a3
+; CHECK-RV32-NEXT:    addi a4, a4, -1
+; CHECK-RV32-NEXT:    and a3, a4, a3
+; CHECK-RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v24, (a0), zero
+; CHECK-RV32-NEXT:    bltu a2, a1, .LBB55_2
+; CHECK-RV32-NEXT:  # %bb.1:
+; CHECK-RV32-NEXT:    mv a2, a1
+; CHECK-RV32-NEXT:  .LBB55_2:
+; CHECK-RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vlse64.v v0, (a0), zero
+; CHECK-RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-RV32-NEXT:    vadd.vv v16, v16, v24
+; CHECK-RV32-NEXT:    vadd.vv v8, v8, v0
+; CHECK-RV32-NEXT:    ret
+;
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    ld a0, 0(a0)
+; CHECK-RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-RV64-NEXT:    vadd.vx v8, v8, a0
+; CHECK-RV64-NEXT:    vadd.vx v16, v16, a0
+; CHECK-RV64-NEXT:    ret
+  %vscale = call i32 @llvm.vscale()
+  %load = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, <vscale x 16 x i1> splat (i1 true), i32 %vscale)
+  %w = add <vscale x 16 x i64> %v, %load
+  ret <vscale x 16 x i64> %w
+}
+
+define <vscale x 1 x ptr> @zero_strided_vadd_nxv1p0(<vscale x 1 x ptr> %v, ptr %ptr) {
+; CHECK-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
+; CHECK-OPT-RV32:       # %bb.0:
+; CHECK-OPT-RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-OPT-RV32-NEXT:    vlse32.v v8, (a0), zero
+; CHECK-OPT-RV32-NEXT:    ret
+;
+; CHECK-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
+; CHECK-OPT-RV64:       # %bb.0:
+; CHECK-OPT-RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-OPT-RV64-NEXT:    vlse64.v v8, (a0), zero
+; CHECK-OPT-RV64-NEXT:    ret
+;
+; CHECK-NO-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0:
+; CHECK-NO-OPT-RV32:       # %bb.0:
+; CHECK-NO-OPT-RV32-NEXT:    lw a0, 0(a0)
+; CHECK-NO-OPT-RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NO-OPT-RV32-NEXT:    vmv.v.x v8, a0
+; CHECK-NO-OPT-RV32-NEXT:    ret
+;
+; CHECK-NO-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0:
+; CHECK-NO-OPT-RV64:       # %bb.0:
+; CHECK-NO-OPT-RV64-NEXT:    ld a0, 0(a0)
+; CHECK-NO-OPT-RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NO-OPT-RV64-NEXT:    vmv.v.x v8, a0
+; CHECK-NO-OPT-RV64-NEXT:    ret
+  %vscale = call i32 @llvm.vscale()
+  %load = call <vscale x 1 x ptr> @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 %vscale)
+  ret <vscale x 1 x ptr> %load
+}

From 2ab8d93061581edad3501561722ebd5632d73892 Mon Sep 17 00:00:00 2001
From: yandalur <quic_yandalur@quicinc.com>
Date: Thu, 1 Aug 2024 21:37:23 +0530
Subject: [PATCH 185/427] [Hexagon] Do not optimize address of another
 function's block (#101209)

When the constant extender optimization pass encounters an instruction
that uses an extended address pointing to another function's block,
avoid adding the instruction to the extender list for the current
machine function.

Fixes https://github.com/llvm/llvm-project/issues/99714

(cherry picked from commit 68df06a0b2998765cb0a41353fcf0919bbf57ddb)
---
 .../Target/Hexagon/HexagonConstExtenders.cpp  |   4 +
 .../CodeGen/Hexagon/cext-opt-block-addr.mir   | 173 ++++++++++++++++++
 2 files changed, 177 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir

diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index f0933765bbcbd..86ce6b4e05ed2 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
   if (ER.Kind == MachineOperand::MO_GlobalAddress)
     if (ER.V.GV->getName().empty())
       return;
+  // Ignore block address that points to block in another function
+  if (ER.Kind == MachineOperand::MO_BlockAddress)
+    if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))
+      return;
   Extenders.push_back(ED);
 }
 
diff --git a/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
new file mode 100644
index 0000000000000..9f140132dcd6c
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/cext-opt-block-addr.mir
@@ -0,0 +1,173 @@
+# REQUIRES: asserts
+# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s
+
+# Check that the HexagonConstantExtenders pass does not assert when block
+# addresses from different functions are used
+# CHECK-LABEL: name: wibble
+# CHECK: A2_tfrsi blockaddress(@baz
+# CHECK: A2_tfrsi blockaddress(@wibble
+
+--- |
+  target triple = "hexagon"
+
+  define dso_local void @baz() {
+  bb:
+    br label %bb1
+
+  bb1:                                              ; preds = %bb
+    %call = tail call fastcc i32 @wibble(i32 poison)
+    ret void
+  }
+
+  define internal fastcc i32 @wibble(i32 %arg) {
+  bb:
+    %call = tail call i32 @eggs(i32 noundef ptrtoint (ptr blockaddress(@baz, %bb1) to i32))
+    br label %bb1
+
+  bb1:                                              ; preds = %bb
+    tail call void @baz.1(i32 noundef ptrtoint (ptr blockaddress(@wibble, %bb1) to i32))
+    ret i32 %call
+  }
+
+  declare i32 @eggs(i32 noundef) local_unnamed_addr
+
+  declare void @baz.1(i32 noundef) local_unnamed_addr
+
+...
+---
+name:            baz
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     true
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.bb:
+    successors: %bb.1(0x80000000)
+
+  bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+    %0:intregs = IMPLICIT_DEF
+    $r0 = COPY %0
+    PS_tailcall_i @wibble, hexagoncsr, implicit $r0
+
+...
+---
+name:            wibble
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs, preferred-register: '' }
+  - { id: 2, class: intregs, preferred-register: '' }
+  - { id: 3, class: intregs, preferred-register: '' }
+  - { id: 4, class: intregs, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.bb:
+    successors: %bb.1(0x80000000)
+
+    %2:intregs = A2_tfrsi blockaddress(@baz, %ir-block.bb1)
+    ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
+    $r0 = COPY %2
+    J2_call @eggs, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
+    %3:intregs = COPY $r0
+
+  bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+    %4:intregs = A2_tfrsi blockaddress(@wibble, %ir-block.bb1)
+    ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
+    $r0 = COPY %4
+    J2_call @baz.1, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29
+    ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
+    $r0 = COPY %3
+    PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...

From 1cfd6754454ba62fd0ad306c09a7b6d526f835dc Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn@arm.com>
Date: Wed, 14 Aug 2024 12:22:51 +0100
Subject: [PATCH 186/427] [AArch64] Add GCS release notes

---
 clang/docs/ReleaseNotes.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd398c22c946..b56e7177846d9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1207,6 +1207,11 @@ Arm and AArch64 Support
     * Arm Neoverse-N3 (neoverse-n3).
     * Arm Neoverse-V3 (neoverse-v3).
     * Arm Neoverse-V3AE (neoverse-v3ae).
+- ``-mbranch-protection=gcs`` has been added which enables support for the
+  Guarded Control Stack extension, and ``-mbranch-protection=standard`` also
+  enables this. Enabling GCS causes the GCS GNU property bit to be set on output
+  objects. It doesn't cause any code generation changes, as the code generated
+  by clang is already compatible with GCS.
 
 Android Support
 ^^^^^^^^^^^^^^^

From 9e90c40564e21dc5f1a12e08cfdf29305aaf9f50 Mon Sep 17 00:00:00 2001
From: Gulfem Savrun Yeniceri <gulfem@google.com>
Date: Tue, 23 Jul 2024 11:06:30 +0000
Subject: [PATCH 187/427] Revert "[CGData] llvm-cgdata (#89884)"

This reverts commit d3fb41dddc11b0ebc338a3b9e6a5ab7288ff7d1d
and forward fix patches because of the issue explained in:
https://github.com/llvm/llvm-project/pull/89884#issuecomment-2244348117.

Revert "Fix tests for https://github.com/llvm/llvm-project/pull/89884
(#100061)"

This reverts commit 67937a3f969aaf97a745a45281a0d22273bff713.

Revert "Fix build break for https://github.com/llvm/llvm-project/pull/89884 (#100050)"

This reverts commit c33878c5787c128234d533ad19d672dc3eea19a8.

Revert "[CGData] Fix -Wpessimizing-move in CodeGenDataReader.cpp (NFC)"

This reverts commit 1f8b2b146141f3563085a1acb77deb50857a636d.

(cherry picked from commit 73d78973fe072438f0f73088f889c66845b2b51a)
---
 llvm/include/llvm/CodeGenData/CodeGenData.h   | 204 -------------
 llvm/include/llvm/CodeGenData/CodeGenData.inc |  46 ---
 .../llvm/CodeGenData/CodeGenDataReader.h      | 154 ----------
 .../llvm/CodeGenData/CodeGenDataWriter.h      |  68 -----
 llvm/lib/CodeGenData/CMakeLists.txt           |   3 -
 llvm/lib/CodeGenData/CodeGenData.cpp          | 196 -------------
 llvm/lib/CodeGenData/CodeGenDataReader.cpp    | 175 ------------
 llvm/lib/CodeGenData/CodeGenDataWriter.cpp    | 162 -----------
 llvm/test/CMakeLists.txt                      |   1 -
 llvm/test/lit.cfg.py                          |   1 -
 llvm/test/tools/llvm-cgdata/dump.test         |  32 ---
 llvm/test/tools/llvm-cgdata/empty.test        |  35 ---
 llvm/test/tools/llvm-cgdata/error.test        |  38 ---
 .../test/tools/llvm-cgdata/merge-archive.test |  90 ------
 llvm/test/tools/llvm-cgdata/merge-concat.test |  83 ------
 llvm/test/tools/llvm-cgdata/merge-double.test |  87 ------
 llvm/test/tools/llvm-cgdata/merge-single.test |  49 ----
 llvm/test/tools/llvm-cgdata/show.test         |  30 --
 llvm/tools/llvm-cgdata/CMakeLists.txt         |  15 -
 llvm/tools/llvm-cgdata/llvm-cgdata.cpp        | 268 ------------------
 20 files changed, 1737 deletions(-)
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.h
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenData.inc
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataReader.h
 delete mode 100644 llvm/include/llvm/CodeGenData/CodeGenDataWriter.h
 delete mode 100644 llvm/lib/CodeGenData/CodeGenData.cpp
 delete mode 100644 llvm/lib/CodeGenData/CodeGenDataReader.cpp
 delete mode 100644 llvm/lib/CodeGenData/CodeGenDataWriter.cpp
 delete mode 100644 llvm/test/tools/llvm-cgdata/dump.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/empty.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/error.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-archive.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-concat.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-double.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/merge-single.test
 delete mode 100644 llvm/test/tools/llvm-cgdata/show.test
 delete mode 100644 llvm/tools/llvm-cgdata/CMakeLists.txt
 delete mode 100644 llvm/tools/llvm-cgdata/llvm-cgdata.cpp

diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.h b/llvm/include/llvm/CodeGenData/CodeGenData.h
deleted file mode 100644
index 659008c78abd9..0000000000000
--- a/llvm/include/llvm/CodeGenData/CodeGenData.h
+++ /dev/null
@@ -1,204 +0,0 @@
-//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for codegen data that has stable summary which
-// can be used to optimize the code in the subsequent codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGENDATA_CODEGENDATA_H
-#define LLVM_CODEGENDATA_CODEGENDATA_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/CodeGenData/OutlinedHashTree.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/TargetParser/Triple.h"
-#include <mutex>
-
-namespace llvm {
-
-enum CGDataSectKind {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
-                                      Triple::ObjectFormatType OF,
-                                      bool AddSegmentInfo = true);
-
-enum class CGDataKind {
-  Unknown = 0x0,
-  // A function outlining info.
-  FunctionOutlinedHashTree = 0x1,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
-};
-
-const std::error_category &cgdata_category();
-
-enum class cgdata_error {
-  success = 0,
-  eof,
-  bad_magic,
-  bad_header,
-  empty_cgdata,
-  malformed,
-  unsupported_version,
-};
-
-inline std::error_code make_error_code(cgdata_error E) {
-  return std::error_code(static_cast<int>(E), cgdata_category());
-}
-
-class CGDataError : public ErrorInfo<CGDataError> {
-public:
-  CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
-      : Err(Err), Msg(ErrStr.str()) {
-    assert(Err != cgdata_error::success && "Not an error");
-  }
-
-  std::string message() const override;
-
-  void log(raw_ostream &OS) const override { OS << message(); }
-
-  std::error_code convertToErrorCode() const override {
-    return make_error_code(Err);
-  }
-
-  cgdata_error get() const { return Err; }
-  const std::string &getMessage() const { return Msg; }
-
-  /// Consume an Error and return the raw enum value contained within it, and
-  /// the optional error message. The Error must either be a success value, or
-  /// contain a single CGDataError.
-  static std::pair<cgdata_error, std::string> take(Error E) {
-    auto Err = cgdata_error::success;
-    std::string Msg;
-    handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
-      assert(Err == cgdata_error::success && "Multiple errors encountered");
-      Err = IPE.get();
-      Msg = IPE.getMessage();
-    });
-    return {Err, Msg};
-  }
-
-  static char ID;
-
-private:
-  cgdata_error Err;
-  std::string Msg;
-};
-
-enum CGDataMode {
-  None,
-  Read,
-  Write,
-};
-
-class CodeGenData {
-  /// Global outlined hash tree that has oulined hash sequences across modules.
-  std::unique_ptr<OutlinedHashTree> PublishedHashTree;
-
-  /// This flag is set when -fcodegen-data-generate is passed.
-  /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
-  bool EmitCGData;
-
-  /// This is a singleton instance which is thread-safe. Unlike profile data
-  /// which is largely function-based, codegen data describes the whole module.
-  /// Therefore, this can be initialized once, and can be used across modules
-  /// instead of constructing the same one for each codegen backend.
-  static std::unique_ptr<CodeGenData> Instance;
-  static std::once_flag OnceFlag;
-
-  CodeGenData() = default;
-
-public:
-  ~CodeGenData() = default;
-
-  static CodeGenData &getInstance();
-
-  /// Returns true if we have a valid outlined hash tree.
-  bool hasOutlinedHashTree() {
-    return PublishedHashTree && !PublishedHashTree->empty();
-  }
-
-  /// Returns the outlined hash tree. This can be globally used in a read-only
-  /// manner.
-  const OutlinedHashTree *getOutlinedHashTree() {
-    return PublishedHashTree.get();
-  }
-
-  /// Returns true if we should write codegen data.
-  bool emitCGData() { return EmitCGData; }
-
-  /// Publish the (globally) merged or read outlined hash tree.
-  void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
-    PublishedHashTree = std::move(HashTree);
-    // Ensure we disable emitCGData as we do not want to read and write both.
-    EmitCGData = false;
-  }
-};
-
-namespace cgdata {
-
-inline bool hasOutlinedHashTree() {
-  return CodeGenData::getInstance().hasOutlinedHashTree();
-}
-
-inline const OutlinedHashTree *getOutlinedHashTree() {
-  return CodeGenData::getInstance().getOutlinedHashTree();
-}
-
-inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
-
-inline void
-publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
-  CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
-}
-
-void warn(Error E, StringRef Whence = "");
-void warn(Twine Message, std::string Whence = "", std::string Hint = "");
-
-} // end namespace cgdata
-
-namespace IndexedCGData {
-
-// A signature for data validation, representing "\xffcgdata\x81" in
-// little-endian order
-const uint64_t Magic = 0x81617461646763ff;
-
-enum CGDataVersion {
-  // Version 1 is the first version. This version supports the outlined
-  // hash tree.
-  Version1 = 1,
-  CurrentVersion = CG_DATA_INDEX_VERSION
-};
-const uint64_t Version = CGDataVersion::CurrentVersion;
-
-struct Header {
-  uint64_t Magic;
-  uint32_t Version;
-  uint32_t DataKind;
-  uint64_t OutlinedHashTreeOffset;
-
-  // New fields should only be added at the end to ensure that the size
-  // computation is correct. The methods below need to be updated to ensure that
-  // the new field is read correctly.
-
-  // Reads a header struct from the buffer.
-  static Expected<Header> readFromBuffer(const unsigned char *Curr);
-};
-
-} // end namespace IndexedCGData
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_PREPARE_H
diff --git a/llvm/include/llvm/CodeGenData/CodeGenData.inc b/llvm/include/llvm/CodeGenData/CodeGenData.inc
deleted file mode 100644
index 08ec14ea051a0..0000000000000
--- a/llvm/include/llvm/CodeGenData/CodeGenData.inc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\
-|*
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-|* See https://llvm.org/LICENSE.txt for license information.
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-|*
-\*===----------------------------------------------------------------------===*/
-/*
- * This is the main file that defines all the data structure, signature,
- * constant literals that are shared across compiler, host tools (reader/writer)
- * to support codegen data.
- *
-\*===----------------------------------------------------------------------===*/
-
-/* Helper macros.  */
-#define CG_DATA_SIMPLE_QUOTE(x) #x
-#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x)
-
-#ifdef CG_DATA_SECT_ENTRY
-#define CG_DATA_DEFINED
-CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
-                   CG_DATA_OUTLINE_COFF, "__DATA,")
-
-#undef CG_DATA_SECT_ENTRY
-#endif
-
-/* section name strings common to all targets other
-   than WIN32 */
-#define CG_DATA_OUTLINE_COMMON __llvm_outline
-/* Since cg data sections are not allocated, we don't need to
- * access them at runtime.
- */
-#define CG_DATA_OUTLINE_COFF ".loutline"
-
-#ifdef _WIN32
-/* Runtime section names and name strings.  */
-#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
-
-#else
-/* Runtime section names and name strings.  */
-#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
-
-#endif
-
-/* Indexed codegen data format version (start from 1). */
-#define CG_DATA_INDEX_VERSION 1
diff --git a/llvm/include/llvm/CodeGenData/CodeGenDataReader.h b/llvm/include/llvm/CodeGenData/CodeGenDataReader.h
deleted file mode 100644
index df4ae3ed24e79..0000000000000
--- a/llvm/include/llvm/CodeGenData/CodeGenDataReader.h
+++ /dev/null
@@ -1,154 +0,0 @@
-//===- CodeGenDataReader.h --------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for reading codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGENDATA_CODEGENDATAREADER_H
-#define LLVM_CODEGENDATA_CODEGENDATAREADER_H
-
-#include "llvm/CodeGenData/CodeGenData.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/VirtualFileSystem.h"
-
-namespace llvm {
-
-class CodeGenDataReader {
-  cgdata_error LastError = cgdata_error::success;
-  std::string LastErrorMsg;
-
-public:
-  CodeGenDataReader() = default;
-  virtual ~CodeGenDataReader() = default;
-
-  /// Read the header.  Required before reading first record.
-  virtual Error read() = 0;
-  /// Return the codegen data version.
-  virtual uint32_t getVersion() const = 0;
-  /// Return the codegen data kind.
-  virtual CGDataKind getDataKind() const = 0;
-  /// Return true if the data has an outlined hash tree.
-  virtual bool hasOutlinedHashTree() const = 0;
-  /// Return the outlined hash tree that is released from the reader.
-  std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() {
-    return std::move(HashTreeRecord.HashTree);
-  }
-
-  /// Factory method to create an appropriately typed reader for the given
-  /// codegen data file path and file system.
-  static Expected<std::unique_ptr<CodeGenDataReader>>
-  create(const Twine &Path, vfs::FileSystem &FS);
-
-  /// Factory method to create an appropriately typed reader for the given
-  /// memory buffer.
-  static Expected<std::unique_ptr<CodeGenDataReader>>
-  create(std::unique_ptr<MemoryBuffer> Buffer);
-
-  /// Extract the cgdata embedded in sections from the given object file and
-  /// merge them into the GlobalOutlineRecord. This is a static helper that
-  /// is used by `llvm-cgdata merge` or ThinLTO's two-codegen rounds.
-  static Error mergeFromObjectFile(const object::ObjectFile *Obj,
-                                   OutlinedHashTreeRecord &GlobalOutlineRecord);
-
-protected:
-  /// The outlined hash tree that has been read. When it's released by
-  /// releaseOutlinedHashTree(), it's no longer valid.
-  OutlinedHashTreeRecord HashTreeRecord;
-
-  /// Set the current error and return same.
-  Error error(cgdata_error Err, const std::string &ErrMsg = "") {
-    LastError = Err;
-    LastErrorMsg = ErrMsg;
-    if (Err == cgdata_error::success)
-      return Error::success();
-    return make_error<CGDataError>(Err, ErrMsg);
-  }
-
-  Error error(Error &&E) {
-    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
-      LastError = IPE.get();
-      LastErrorMsg = IPE.getMessage();
-    });
-    return make_error<CGDataError>(LastError, LastErrorMsg);
-  }
-
-  /// Clear the current error and return a successful one.
-  Error success() { return error(cgdata_error::success); }
-};
-
-class IndexedCodeGenDataReader : public CodeGenDataReader {
-  /// The codegen data file contents.
-  std::unique_ptr<MemoryBuffer> DataBuffer;
-  /// The header
-  IndexedCGData::Header Header;
-
-public:
-  IndexedCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)) {}
-  IndexedCodeGenDataReader(const IndexedCodeGenDataReader &) = delete;
-  IndexedCodeGenDataReader &
-  operator=(const IndexedCodeGenDataReader &) = delete;
-
-  /// Return true if the given buffer is in binary codegen data format.
-  static bool hasFormat(const MemoryBuffer &Buffer);
-  /// Read the contents including the header.
-  Error read() override;
-  /// Return the codegen data version.
-  uint32_t getVersion() const override { return Header.Version; }
-  /// Return the codegen data kind.
-  CGDataKind getDataKind() const override {
-    return static_cast<CGDataKind>(Header.DataKind);
-  }
-  /// Return true if the header indicates the data has an outlined hash tree.
-  /// This does not mean that the data is still available.
-  bool hasOutlinedHashTree() const override {
-    return Header.DataKind &
-           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-  }
-};
-
-/// This format is a simple text format that's suitable for test data.
-/// The header is a custom format starting with `:` per line to indicate which
-/// codegen data is recorded. `#` is used to indicate a comment.
-/// The subsequent data is a YAML format per each codegen data in order.
-/// Currently, it only has a function outlined hash tree.
-class TextCodeGenDataReader : public CodeGenDataReader {
-  /// The codegen data file contents.
-  std::unique_ptr<MemoryBuffer> DataBuffer;
-  /// Iterator over the profile data.
-  line_iterator Line;
-  /// Describe the kind of the codegen data.
-  CGDataKind DataKind = CGDataKind::Unknown;
-
-public:
-  TextCodeGenDataReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
-      : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
-  TextCodeGenDataReader(const TextCodeGenDataReader &) = delete;
-  TextCodeGenDataReader &operator=(const TextCodeGenDataReader &) = delete;
-
-  /// Return true if the given buffer is in text codegen data format.
-  static bool hasFormat(const MemoryBuffer &Buffer);
-  /// Read the contents including the header.
-  Error read() override;
-  /// Text format does not have version, so return 0.
-  uint32_t getVersion() const override { return 0; }
-  /// Return the codegen data kind.
-  CGDataKind getDataKind() const override { return DataKind; }
-  /// Return true if the header indicates the data has an outlined hash tree.
-  /// This does not mean that the data is still available.
-  bool hasOutlinedHashTree() const override {
-    return static_cast<uint32_t>(DataKind) &
-           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-  }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGENDATA_CODEGENDATAREADER_H
diff --git a/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h b/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h
deleted file mode 100644
index e17ffc3482ec9..0000000000000
--- a/llvm/include/llvm/CodeGenData/CodeGenDataWriter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- CodeGenDataWriter.h --------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGENDATA_CODEGENDATAWRITER_H
-#define LLVM_CODEGENDATA_CODEGENDATAWRITER_H
-
-#include "llvm/CodeGenData/CodeGenData.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Support/Error.h"
-
-namespace llvm {
-
-class CGDataOStream;
-
-class CodeGenDataWriter {
-  /// The outlined hash tree to be written.
-  OutlinedHashTreeRecord HashTreeRecord;
-
-  /// A bit mask describing the kind of the codegen data.
-  CGDataKind DataKind = CGDataKind::Unknown;
-
-public:
-  CodeGenDataWriter() = default;
-  ~CodeGenDataWriter() = default;
-
-  /// Add the outlined hash tree record. The input Record is released.
-  void addRecord(OutlinedHashTreeRecord &Record);
-
-  /// Write the codegen data to \c OS
-  Error write(raw_fd_ostream &OS);
-
-  /// Write the codegen data in text format to \c OS
-  Error writeText(raw_fd_ostream &OS);
-
-  /// Return the attributes of the current CGData.
-  CGDataKind getCGDataKind() const { return DataKind; }
-
-  /// Return true if the header indicates the data has an outlined hash tree.
-  bool hasOutlinedHashTree() const {
-    return static_cast<uint32_t>(DataKind) &
-           static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-  }
-
-private:
-  /// The offset of the outlined hash tree in the file.
-  uint64_t OutlinedHashTreeOffset;
-
-  /// Write the codegen data header to \c COS
-  Error writeHeader(CGDataOStream &COS);
-
-  /// Write the codegen data header in text to \c OS
-  Error writeHeaderText(raw_fd_ostream &OS);
-
-  Error writeImpl(CGDataOStream &COS);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGENDATA_CODEGENDATAWRITER_H
diff --git a/llvm/lib/CodeGenData/CMakeLists.txt b/llvm/lib/CodeGenData/CMakeLists.txt
index 0a231d6214fea..f9d107f52a715 100644
--- a/llvm/lib/CodeGenData/CMakeLists.txt
+++ b/llvm/lib/CodeGenData/CMakeLists.txt
@@ -1,7 +1,4 @@
 add_llvm_component_library(LLVMCodeGenData
-  CodeGenData.cpp
-  CodeGenDataReader.cpp
-  CodeGenDataWriter.cpp
   OutlinedHashTree.cpp
   OutlinedHashTreeRecord.cpp
 
diff --git a/llvm/lib/CodeGenData/CodeGenData.cpp b/llvm/lib/CodeGenData/CodeGenData.cpp
deleted file mode 100644
index 49b7447440959..0000000000000
--- a/llvm/lib/CodeGenData/CodeGenData.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===-- CodeGenData.cpp ---------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for codegen data that has stable summary which
-// can be used to optimize the code in the subsequent codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/CodeGenData/CodeGenDataReader.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/WithColor.h"
-
-#define DEBUG_TYPE "cg-data"
-
-using namespace llvm;
-using namespace cgdata;
-
-static std::string getCGDataErrString(cgdata_error Err,
-                                      const std::string &ErrMsg = "") {
-  std::string Msg;
-  raw_string_ostream OS(Msg);
-
-  switch (Err) {
-  case cgdata_error::success:
-    OS << "success";
-    break;
-  case cgdata_error::eof:
-    OS << "end of File";
-    break;
-  case cgdata_error::bad_magic:
-    OS << "invalid codegen data (bad magic)";
-    break;
-  case cgdata_error::bad_header:
-    OS << "invalid codegen data (file header is corrupt)";
-    break;
-  case cgdata_error::empty_cgdata:
-    OS << "empty codegen data";
-    break;
-  case cgdata_error::malformed:
-    OS << "malformed codegen data";
-    break;
-  case cgdata_error::unsupported_version:
-    OS << "unsupported codegen data version";
-    break;
-  }
-
-  // If optional error message is not empty, append it to the message.
-  if (!ErrMsg.empty())
-    OS << ": " << ErrMsg;
-
-  return OS.str();
-}
-
-namespace {
-
-// FIXME: This class is only here to support the transition to llvm::Error. It
-// will be removed once this transition is complete. Clients should prefer to
-// deal with the Error value directly, rather than converting to error_code.
-class CGDataErrorCategoryType : public std::error_category {
-  const char *name() const noexcept override { return "llvm.cgdata"; }
-
-  std::string message(int IE) const override {
-    return getCGDataErrString(static_cast<cgdata_error>(IE));
-  }
-};
-
-} // end anonymous namespace
-
-const std::error_category &llvm::cgdata_category() {
-  static CGDataErrorCategoryType ErrorCategory;
-  return ErrorCategory;
-}
-
-std::string CGDataError::message() const {
-  return getCGDataErrString(Err, Msg);
-}
-
-char CGDataError::ID = 0;
-
-namespace {
-
-const char *CodeGenDataSectNameCommon[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix)         \
-  SectNameCommon,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-const char *CodeGenDataSectNameCoff[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix)         \
-  SectNameCoff,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-const char *CodeGenDataSectNamePrefix[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-} // namespace
-
-namespace llvm {
-
-std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
-                                      Triple::ObjectFormatType OF,
-                                      bool AddSegmentInfo) {
-  std::string SectName;
-
-  if (OF == Triple::MachO && AddSegmentInfo)
-    SectName = CodeGenDataSectNamePrefix[CGSK];
-
-  if (OF == Triple::COFF)
-    SectName += CodeGenDataSectNameCoff[CGSK];
-  else
-    SectName += CodeGenDataSectNameCommon[CGSK];
-
-  return SectName;
-}
-
-std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
-std::once_flag CodeGenData::OnceFlag;
-
-CodeGenData &CodeGenData::getInstance() {
-  std::call_once(CodeGenData::OnceFlag, []() {
-    Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
-
-    // TODO: Initialize writer or reader mode for the client optimization.
-  });
-  return *(Instance.get());
-}
-
-namespace IndexedCGData {
-
-Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
-  using namespace support;
-
-  static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
-                "The header should be standard layout type since we use offset "
-                "of fields to read.");
-  Header H;
-  H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
-  if (H.Magic != IndexedCGData::Magic)
-    return make_error<CGDataError>(cgdata_error::bad_magic);
-  H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
-  if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion)
-    return make_error<CGDataError>(cgdata_error::unsupported_version);
-  H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
-
-  switch (H.Version) {
-    // When a new field is added to the header add a case statement here to
-    // compute the size as offset of the new field + size of the new field. This
-    // relies on the field being added to the end of the list.
-    static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
-                  "Please update the size computation below if a new field has "
-                  "been added to the header, if not add a case statement to "
-                  "fall through to the latest version.");
-  case 1ull:
-    H.OutlinedHashTreeOffset =
-        endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
-  }
-
-  return H;
-}
-
-} // end namespace IndexedCGData
-
-namespace cgdata {
-
-void warn(Twine Message, std::string Whence, std::string Hint) {
-  WithColor::warning();
-  if (!Whence.empty())
-    errs() << Whence << ": ";
-  errs() << Message << "\n";
-  if (!Hint.empty())
-    WithColor::note() << Hint << "\n";
-}
-
-void warn(Error E, StringRef Whence) {
-  if (E.isA<CGDataError>()) {
-    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
-      warn(IPE.message(), Whence.str(), "");
-    });
-  }
-}
-
-} // end namespace cgdata
-
-} // end namespace llvm
diff --git a/llvm/lib/CodeGenData/CodeGenDataReader.cpp b/llvm/lib/CodeGenData/CodeGenDataReader.cpp
deleted file mode 100644
index bcd61047079ff..0000000000000
--- a/llvm/lib/CodeGenData/CodeGenDataReader.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-//===- CodeGenDataReader.cpp ----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for reading codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGenData/CodeGenDataReader.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-#define DEBUG_TYPE "cg-data-reader"
-
-using namespace llvm;
-
-namespace llvm {
-
-static Expected<std::unique_ptr<MemoryBuffer>>
-setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
-  auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
-                                           : FS.getBufferForFile(Filename);
-  if (std::error_code EC = BufferOrErr.getError())
-    return errorCodeToError(EC);
-  return std::move(BufferOrErr.get());
-}
-
-Error CodeGenDataReader::mergeFromObjectFile(
-    const object::ObjectFile *Obj,
-    OutlinedHashTreeRecord &GlobalOutlineRecord) {
-  Triple TT = Obj->makeTriple();
-  auto CGOutLineName =
-      getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
-
-  for (auto &Section : Obj->sections()) {
-    Expected<StringRef> NameOrErr = Section.getName();
-    if (!NameOrErr)
-      return NameOrErr.takeError();
-    Expected<StringRef> ContentsOrErr = Section.getContents();
-    if (!ContentsOrErr)
-      return ContentsOrErr.takeError();
-    auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
-    auto *EndData = Data + ContentsOrErr->size();
-
-    if (*NameOrErr == CGOutLineName) {
-      // In case dealing with an executable that has concatenated cgdata,
-      // we want to merge them into a single cgdata.
-      // Although it's not a typical workflow, we support this scenario.
-      while (Data != EndData) {
-        OutlinedHashTreeRecord LocalOutlineRecord;
-        LocalOutlineRecord.deserialize(Data);
-        GlobalOutlineRecord.merge(LocalOutlineRecord);
-      }
-    }
-    // TODO: Add support for other cgdata sections.
-  }
-
-  return Error::success();
-}
-
-Error IndexedCodeGenDataReader::read() {
-  using namespace support;
-
-  // The smallest header with the version 1 is 24 bytes
-  const unsigned MinHeaderSize = 24;
-  if (DataBuffer->getBufferSize() < MinHeaderSize)
-    return error(cgdata_error::bad_header);
-
-  auto *Start =
-      reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
-  auto *End =
-      reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
-  if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
-    return E;
-
-  if (hasOutlinedHashTree()) {
-    const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
-    if (Ptr >= End)
-      return error(cgdata_error::eof);
-    HashTreeRecord.deserialize(Ptr);
-  }
-
-  return success();
-}
-
-Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
-  // Set up the buffer to read.
-  auto BufferOrError = setupMemoryBuffer(Path, FS);
-  if (Error E = BufferOrError.takeError())
-    return std::move(E);
-  return CodeGenDataReader::create(std::move(BufferOrError.get()));
-}
-
-Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
-  if (Buffer->getBufferSize() == 0)
-    return make_error<CGDataError>(cgdata_error::empty_cgdata);
-
-  std::unique_ptr<CodeGenDataReader> Reader;
-  // Create the reader.
-  if (IndexedCodeGenDataReader::hasFormat(*Buffer))
-    Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
-  else if (TextCodeGenDataReader::hasFormat(*Buffer))
-    Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
-  else
-    return make_error<CGDataError>(cgdata_error::malformed);
-
-  // Initialize the reader and return the result.
-  if (Error E = Reader->read())
-    return std::move(E);
-
-  return std::move(Reader);
-}
-
-bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
-  using namespace support;
-  if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
-    return false;
-
-  uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
-      DataBuffer.getBufferStart());
-  // Verify that it's magical.
-  return Magic == IndexedCGData::Magic;
-}
-
-bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
-  // Verify that this really looks like plain ASCII text by checking a
-  // 'reasonable' number of characters (up to the magic size).
-  StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
-  return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
-}
-Error TextCodeGenDataReader::read() {
-  using namespace support;
-
-  // Parse the custom header line by line.
-  for (; !Line.is_at_eof(); ++Line) {
-    // Skip empty or whitespace-only lines
-    if (Line->trim().empty())
-      continue;
-
-    if (!Line->starts_with(":"))
-      break;
-    StringRef Str = Line->drop_front().rtrim();
-    if (Str.equals_insensitive("outlined_hash_tree"))
-      DataKind |= CGDataKind::FunctionOutlinedHashTree;
-    else
-      return error(cgdata_error::bad_header);
-  }
-
-  // We treat an empty header (that is a comment # only) as a valid header.
-  if (Line.is_at_eof()) {
-    if (DataKind == CGDataKind::Unknown)
-      return Error::success();
-    return error(cgdata_error::bad_header);
-  }
-
-  // The YAML docs follow after the header.
-  const char *Pos = Line->data();
-  size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
-                reinterpret_cast<size_t>(Pos);
-  yaml::Input YOS(StringRef(Pos, Size));
-  if (hasOutlinedHashTree())
-    HashTreeRecord.deserializeYAML(YOS);
-
-  // TODO: Add more yaml cgdata in order
-
-  return Error::success();
-}
-} // end namespace llvm
diff --git a/llvm/lib/CodeGenData/CodeGenDataWriter.cpp b/llvm/lib/CodeGenData/CodeGenDataWriter.cpp
deleted file mode 100644
index 3c91a1b303450..0000000000000
--- a/llvm/lib/CodeGenData/CodeGenDataWriter.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===- CodeGenDataWriter.cpp ----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGenData/CodeGenDataWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/EndianStream.h"
-
-#define DEBUG_TYPE "cg-data-writer"
-
-using namespace llvm;
-
-namespace llvm {
-
-/// A struct to define how the data stream should be patched.
-struct CGDataPatchItem {
-  uint64_t Pos; // Where to patch.
-  uint64_t *D;  // Pointer to an array of source data.
-  int N;        // Number of elements in \c D array.
-};
-
-// A wrapper class to abstract writer stream with support of bytes
-// back patching.
-class CGDataOStream {
-public:
-  CGDataOStream(raw_fd_ostream &FD)
-      : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
-  CGDataOStream(raw_string_ostream &STR)
-      : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
-
-  uint64_t tell() { return OS.tell(); }
-  void write(uint64_t V) { LE.write<uint64_t>(V); }
-  void write32(uint32_t V) { LE.write<uint32_t>(V); }
-  void write8(uint8_t V) { LE.write<uint8_t>(V); }
-
-  // \c patch can only be called when all data is written and flushed.
-  // For raw_string_ostream, the patch is done on the target string
-  // directly and it won't be reflected in the stream's internal buffer.
-  void patch(ArrayRef<CGDataPatchItem> P) {
-    using namespace support;
-
-    if (IsFDOStream) {
-      raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
-      const uint64_t LastPos = FDOStream.tell();
-      for (const auto &K : P) {
-        FDOStream.seek(K.Pos);
-        for (int I = 0; I < K.N; I++)
-          write(K.D[I]);
-      }
-      // Reset the stream to the last position after patching so that users
-      // don't accidentally overwrite data. This makes it consistent with
-      // the string stream below which replaces the data directly.
-      FDOStream.seek(LastPos);
-    } else {
-      raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
-      std::string &Data = SOStream.str(); // with flush
-      for (const auto &K : P) {
-        for (int I = 0; I < K.N; I++) {
-          uint64_t Bytes =
-              endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
-          Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
-                       (const char *)&Bytes, sizeof(uint64_t));
-        }
-      }
-    }
-  }
-
-  // If \c OS is an instance of \c raw_fd_ostream, this field will be
-  // true. Otherwise, \c OS will be an raw_string_ostream.
-  bool IsFDOStream;
-  raw_ostream &OS;
-  support::endian::Writer LE;
-};
-
-} // end namespace llvm
-
-void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
-  assert(Record.HashTree && "empty hash tree in the record");
-  HashTreeRecord.HashTree = std::move(Record.HashTree);
-
-  DataKind |= CGDataKind::FunctionOutlinedHashTree;
-}
-
-Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
-  CGDataOStream COS(OS);
-  return writeImpl(COS);
-}
-
-Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
-  using namespace support;
-  IndexedCGData::Header Header;
-  Header.Magic = IndexedCGData::Magic;
-  Header.Version = IndexedCGData::Version;
-
-  // Set the CGDataKind depending on the kind.
-  Header.DataKind = 0;
-  if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
-    Header.DataKind |=
-        static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-
-  Header.OutlinedHashTreeOffset = 0;
-
-  // Only write up to the CGDataKind. We need to remember the offset of the
-  // remaining fields to allow back-patching later.
-  COS.write(Header.Magic);
-  COS.write32(Header.Version);
-  COS.write32(Header.DataKind);
-
-  // Save the location of Header.OutlinedHashTreeOffset field in \c COS.
-  OutlinedHashTreeOffset = COS.tell();
-
-  // Reserve the space for OutlinedHashTreeOffset field.
-  COS.write(0);
-
-  return Error::success();
-}
-
-Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
-  if (Error E = writeHeader(COS))
-    return E;
-
-  uint64_t OutlinedHashTreeFieldStart = COS.tell();
-  if (hasOutlinedHashTree())
-    HashTreeRecord.serialize(COS.OS);
-
-  // Back patch the offsets.
-  CGDataPatchItem PatchItems[] = {
-      {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
-  COS.patch(PatchItems);
-
-  return Error::success();
-}
-
-Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
-  if (hasOutlinedHashTree())
-    OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";
-
-  // TODO: Add more data types in this header
-
-  return Error::success();
-}
-
-Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
-  if (Error E = writeHeaderText(OS))
-    return E;
-
-  yaml::Output YOS(OS);
-  if (hasOutlinedHashTree())
-    HashTreeRecord.serializeYAML(YOS);
-
-  // TODO: Write more yaml cgdata in order
-
-  return Error::success();
-}
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 720cbd064ed26..6b7f2b58e603e 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -74,7 +74,6 @@ set(LLVM_TEST_DEPENDS
           llvm-c-test
           llvm-cat
           llvm-cfi-verify
-          llvm-cgdata
           llvm-config
           llvm-cov
           llvm-cvtres
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index b6431fcaf459f..fe1262893212f 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -180,7 +180,6 @@ def get_asan_rtlib():
         "llvm-addr2line",
         "llvm-bcanalyzer",
         "llvm-bitcode-strip",
-        "llvm-cgdata",
         "llvm-config",
         "llvm-cov",
         "llvm-cxxdump",
diff --git a/llvm/test/tools/llvm-cgdata/dump.test b/llvm/test/tools/llvm-cgdata/dump.test
deleted file mode 100644
index 20e0b654973c2..0000000000000
--- a/llvm/test/tools/llvm-cgdata/dump.test
+++ /dev/null
@@ -1,32 +0,0 @@
-# Test dump between the binary and text formats.
-
-RUN: split-file %s %t
-
-RUN: llvm-cgdata dump -binary %t/dump.cgtext -o %t/dump.cgdata
-RUN: llvm-cgdata dump -text %t/dump.cgdata -o %t/dump-round.cgtext
-RUN: llvm-cgdata dump -binary %t/dump-round.cgtext -o %t/dump-round.cgdata
-RUN: llvm-cgdata dump -text %t/dump-round.cgtext -o %t/dump-round-round.cgtext
-RUN: diff %t/dump.cgdata %t/dump-round.cgdata
-RUN: diff %t/dump-round.cgtext %t/dump-round-round.cgtext
-
-;--- dump.cgtext
-# Outlined stable hash tree
-:outlined_hash_tree
----
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2, 3 ]
-2:
-  Hash:            0x3
-  Terminals:       5
-  SuccessorIds:    [  ]
-3:
-  Hash:            0x2
-  Terminals:       4
-  SuccessorIds:    [  ]
-...
diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test
deleted file mode 100644
index 6e41f33ade9c3..0000000000000
--- a/llvm/test/tools/llvm-cgdata/empty.test
+++ /dev/null
@@ -1,35 +0,0 @@
-# Test no input file
-RUN: not llvm-cgdata dump -o - 2>&1 | FileCheck %s --check-prefix=NOFILE --ignore-case
-NOFILE: error: No such file or directory
-
-# Test for empty cgdata file, which is invalid.
-RUN: touch %t_emptyfile.cgtext
-RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text 2>&1 | FileCheck %s --check-prefix=EMPTY
-EMPTY: {{.}}emptyfile.cgtext: empty codegen data
-
-# Test for empty header in the text format. It can be converted to a valid binary file.
-RUN: printf '#' > %t_emptyheader.cgtext
-RUN: llvm-cgdata dump %t_emptyheader.cgtext -binary -o %t_emptyheader.cgdata
-
-# Without any cgdata other than the header, no data shows by default.
-RUN: llvm-cgdata show %t_emptyheader.cgdata | count 0
-
-# The version number appears when asked, as it's in the header
-RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
-VERSION: Version: 1
-
-# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
-RUN: llvm-cgdata dump %t_emptyheader.cgdata -text | count 0
-
-# Synthesize a header only cgdata.
-# struct Header {
-#   uint64_t Magic;
-#   uint32_t Version;
-#   uint32_t DataKind;
-#   uint64_t OutlinedHashTreeOffset;
-# }
-RUN: printf '\xffcgdata\x81' > %t_header.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata
-RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
-RUN: diff %t_header.cgdata %t_emptyheader.cgdata
diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test
deleted file mode 100644
index 4da22498ea390..0000000000000
--- a/llvm/test/tools/llvm-cgdata/error.test
+++ /dev/null
@@ -1,38 +0,0 @@
-# Test various error cases
-
-# Synthesize a header only cgdata.
-# struct Header {
-#   uint64_t Magic;
-#   uint32_t Version;
-#   uint32_t DataKind;
-#   uint64_t OutlinedHashTreeOffset;
-# }
-RUN: touch %t_empty.cgdata
-RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
-EMPTY: {{.}}cgdata: empty codegen data
-
-# Not a magic.
-RUN: printf '\xff' > %t_malformed.cgdata
-RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED
-MALFORMED: {{.}}cgdata: malformed codegen data
-
-# The minimum header size is 24.
-RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
-RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s  --check-prefix=CORRUPT
-CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
-
-# The current version 1 while the header says 2.
-RUN: printf '\xffcgdata\x81' > %t_version.cgdata
-RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
-RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
-RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s  --check-prefix=BAD_VERSION
-BAD_VERSION: {{.}}cgdata: unsupported codegen data version
-
-# Header says an outlined hash tree, but the file ends after the header.
-RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
-RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
-RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s  --check-prefix=EOF
-EOF: {{.}}cgdata: end of File
diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-archive.test
deleted file mode 100644
index e17422fdcd140..0000000000000
--- a/llvm/test/tools/llvm-cgdata/merge-archive.test
+++ /dev/null
@@ -1,90 +0,0 @@
-# REQUIRES: shell, aarch64-registered-target
-# UNSUPPORTED: system-windows
-
-# Merge an archive that has two object files having cgdata (__llvm_outline)
-
-RUN: split-file %s %t
-
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
-RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
-RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
-
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
-RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
-RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
-
-# Make an archive from two object files
-RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
-
-# Merge the archive into the codegen data file.
-RUN: llvm-cgdata merge %t/merge-archive.a -o %t/merge-archive.cgdata
-RUN: llvm-cgdata show %t/merge-archive.cgdata | FileCheck %s
-CHECK: Outlined hash tree:
-CHECK-NEXT:  Total Node Count: 4
-CHECK-NEXT:  Terminal Node Count: 2
-CHECK-NEXT:  Depth: 2
-
-RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE
-TREE: # Outlined stable hash tree
-TREE-NEXT: :outlined_hash_tree
-TREE-NEXT: ---
-TREE-NEXT: 0:
-TREE-NEXT:   Hash:            0x0
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 1 ]
-TREE-NEXT: 1:
-TREE-NEXT:   Hash:            0x1
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
-TREE-NEXT: 2:
-TREE-NEXT:   Hash:            0x3
-TREE-NEXT:   Terminals:       5
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: 3:
-TREE-NEXT:   Hash:            0x2
-TREE-NEXT:   Terminals:       4
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: ...
-
-;--- raw-1.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x2
-  Terminals:       4
-  SuccessorIds:    [  ]
-...
-
-;--- merge-1.ll
-@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
-
-
-;--- raw-2.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x3
-  Terminals:       5
-  SuccessorIds:    [  ]
-...
-
-;--- merge-2.ll
-@.data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-concat.test
deleted file mode 100644
index 68ce20503e58d..0000000000000
--- a/llvm/test/tools/llvm-cgdata/merge-concat.test
+++ /dev/null
@@ -1,83 +0,0 @@
-# REQUIRES: shell, aarch64-registered-target
-# UNSUPPORTED: system-windows
-
-# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_outline)
-
-RUN: split-file %s %t
-
-# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata.
-# Concatenate them in merge-concat.ll
-RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
-RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll
-RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
-RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll
-
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
-RUN: llvm-cgdata merge %t/merge-concat.o -o %t/merge-concat.cgdata
-RUN: llvm-cgdata show %t/merge-concat.cgdata | FileCheck %s
-CHECK: Outlined hash tree:
-CHECK-NEXT:  Total Node Count: 4
-CHECK-NEXT:  Terminal Node Count: 2
-CHECK-NEXT:  Depth: 2
-
-RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE
-TREE: # Outlined stable hash tree
-TREE-NEXT: :outlined_hash_tree
-TREE-NEXT: ---
-TREE-NEXT: 0:
-TREE-NEXT:   Hash:            0x0
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 1 ]
-TREE-NEXT: 1:
-TREE-NEXT:   Hash:            0x1
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
-TREE-NEXT: 2:
-TREE-NEXT:   Hash:            0x3
-TREE-NEXT:   Terminals:       5
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: 3:
-TREE-NEXT:   Hash:            0x2
-TREE-NEXT:   Terminals:       4
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: ...
-
-;--- raw-1.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x2
-  Terminals:       4
-  SuccessorIds:    [  ]
-...
-
-;--- raw-2.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x3
-  Terminals:       5
-  SuccessorIds:    [  ]
-...
-
-;--- merge-concat.ll
-
-; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format.
-@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
-@.data2 = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-double.test
deleted file mode 100644
index c88e95ba68ea4..0000000000000
--- a/llvm/test/tools/llvm-cgdata/merge-double.test
+++ /dev/null
@@ -1,87 +0,0 @@
-# REQUIRES: shell, aarch64-registered-target
-# UNSUPPORTED: system-windows
-
-# Merge two object files having cgdata (__llvm_outline)
-
-RUN: split-file %s %t
-
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
-RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt
-RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
-
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
-RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt
-RUN: sed -ie "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
-
-# Merge two object files into the codegen data file.
-RUN: llvm-cgdata merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
-
-RUN: llvm-cgdata show %t/merge.cgdata | FileCheck %s
-CHECK: Outlined hash tree:
-CHECK-NEXT:  Total Node Count: 4
-CHECK-NEXT:  Terminal Node Count: 2
-CHECK-NEXT:  Depth: 2
-
-RUN: llvm-cgdata dump %t/merge.cgdata | FileCheck %s --check-prefix=TREE
-TREE: # Outlined stable hash tree
-TREE-NEXT: :outlined_hash_tree
-TREE-NEXT: ---
-TREE-NEXT: 0:
-TREE-NEXT:   Hash:            0x0
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 1 ]
-TREE-NEXT: 1:
-TREE-NEXT:   Hash:            0x1
-TREE-NEXT:   Terminals:       0
-TREE-NEXT:   SuccessorIds:    [ 2, 3 ]
-TREE-NEXT: 2:
-TREE-NEXT:   Hash:            0x3
-TREE-NEXT:   Terminals:       5
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: 3:
-TREE-NEXT:   Hash:            0x2
-TREE-NEXT:   Terminals:       4
-TREE-NEXT:   SuccessorIds:    [  ]
-TREE-NEXT: ...
-
-;--- raw-1.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x2
-  Terminals:       4
-  SuccessorIds:    [  ]
-...
-
-;--- merge-1.ll
-@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
-
-;--- raw-2.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x3
-  Terminals:       5
-  SuccessorIds:    [  ]
-...
-
-;--- merge-2.ll
-@.data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-single.test
deleted file mode 100644
index 37532eff6b9c8..0000000000000
--- a/llvm/test/tools/llvm-cgdata/merge-single.test
+++ /dev/null
@@ -1,49 +0,0 @@
-# REQUIRES: shell, aarch64-registered-target
-# UNSUPPORTED: system-windows
-
-# Test merge a single object file into a cgdata
-
-RUN: split-file %s %t
-
-# Merge an object file that has no cgdata (__llvm_outline). It still produces a header only cgdata.
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-empty.ll -o %t/merge-empty.o
-RUN: llvm-cgdata merge %t/merge-empty.o -o %t/merge-empty.cgdata
-# No summary appear with the header only cgdata.
-RUN: llvm-cgdata show %t/merge-empty.cgdata | count 0
-
-# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
-RUN: llvm-cgdata dump -binary %t/raw-single.cgtext -o %t/raw-single.cgdata
-RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt
-
-RUN: sed -ie "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single.ll
-RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o
-
-# Merge an object file having cgdata (__llvm_outline)
-RUN: llvm-cgdata merge %t/merge-single.o -o %t/merge-single.cgdata
-RUN: llvm-cgdata show %t/merge-single.cgdata | FileCheck %s
-CHECK: Outlined hash tree:
-CHECK-NEXT:  Total Node Count: 3
-CHECK-NEXT:  Terminal Node Count: 1
-CHECK-NEXT:  Depth: 2
-
-;--- merge-empty.ll
-@.data = private unnamed_addr constant [1 x i8] c"\01"
-
-;--- raw-single.cgtext
-:outlined_hash_tree
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x2
-  Terminals:       4
-  SuccessorIds:    [  ]
-...
-
-;--- merge-single.ll
-@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
diff --git a/llvm/test/tools/llvm-cgdata/show.test b/llvm/test/tools/llvm-cgdata/show.test
deleted file mode 100644
index accb4b77ede24..0000000000000
--- a/llvm/test/tools/llvm-cgdata/show.test
+++ /dev/null
@@ -1,30 +0,0 @@
-# Test show
-
-RUN: split-file %s %t
-RUN: llvm-cgdata show %t/show.cgtext | FileCheck %s
-
-CHECK: Outlined hash tree:
-CHECK-NEXT:   Total Node Count: 3
-CHECK-NEXT:   Terminal Node Count: 1
-CHECK-NEXT:   Depth: 2
-
-# Convert the text file to the binary file
-RUN: llvm-cgdata dump -binary %t/show.cgtext -o %t/show.cgdata
-RUN: llvm-cgdata show %t/show.cgdata | FileCheck %s
-
-;--- show.cgtext
-:outlined_hash_tree
----
-0:
-  Hash:            0x0
-  Terminals:       0
-  SuccessorIds:    [ 1 ]
-1:
-  Hash:            0x1
-  Terminals:       0
-  SuccessorIds:    [ 2 ]
-2:
-  Hash:            0x2
-  Terminals:       3
-  SuccessorIds:    [  ]
-...
diff --git a/llvm/tools/llvm-cgdata/CMakeLists.txt b/llvm/tools/llvm-cgdata/CMakeLists.txt
deleted file mode 100644
index 4f1f7ff635bc3..0000000000000
--- a/llvm/tools/llvm-cgdata/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  CodeGen
-  CodeGenData
-  Core
-  Object
-  Support
-  )
-
-add_llvm_tool(llvm-cgdata
-  llvm-cgdata.cpp
-
-  DEPENDS
-  intrinsics_gen
-  GENERATE_DRIVER
-  )
diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
deleted file mode 100644
index 3303ffd9d863b..0000000000000
--- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-//===-- llvm-cgdata.cpp - LLVM CodeGen Data Tool --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// llvm-cgdata parses raw codegen data embedded in compiled binary files, and
-// merges them into a single .cgdata file. It can also inspect and maninuplate
-// a .cgdata file. This .cgdata can contain various codegen data like outlining
-// information, and it can be used to optimize the code in the subsequent build.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGenData/CodeGenDataReader.h"
-#include "llvm/CodeGenData/CodeGenDataWriter.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Object/Archive.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/LLVMDriver.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Support/WithColor.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-// TODO: https://llvm.org/docs/CommandGuide/llvm-cgdata.html has documentations
-// on each subcommand.
-cl::SubCommand DumpSubcommand(
-    "dump",
-    "Dump the (indexed) codegen data file in either text or binary format.");
-cl::SubCommand MergeSubcommand(
-    "merge", "Takes binary files having raw codegen data in custom sections, "
-             "and merge them into an index codegen data file.");
-cl::SubCommand
-    ShowSubcommand("show", "Show summary of the (indexed) codegen data file.");
-
-enum CGDataFormat {
-  CD_None = 0,
-  CD_Text,
-  CD_Binary,
-};
-
-cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
-                                    cl::init("-"), cl::desc("Output file"),
-                                    cl::sub(DumpSubcommand),
-                                    cl::sub(MergeSubcommand));
-cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
-                          cl::aliasopt(OutputFilename));
-
-cl::opt<std::string> Filename(cl::Positional, cl::desc("<cgdata-file>"),
-                              cl::sub(DumpSubcommand), cl::sub(ShowSubcommand));
-cl::list<std::string> InputFilenames(cl::Positional, cl::sub(MergeSubcommand),
-                                     cl::desc("<binary-files...>"));
-cl::opt<CGDataFormat> OutputFormat(
-    cl::desc("Format of output data"), cl::sub(DumpSubcommand),
-    cl::init(CD_Text),
-    cl::values(clEnumValN(CD_Text, "text", "Text encoding"),
-               clEnumValN(CD_Binary, "binary", "Binary encoding")));
-
-cl::opt<bool> ShowCGDataVersion("cgdata-version",
-                                cl::desc("Show cgdata version."),
-                                cl::sub(ShowSubcommand));
-
-static void exitWithError(Twine Message, std::string Whence = "",
-                          std::string Hint = "") {
-  WithColor::error();
-  if (!Whence.empty())
-    errs() << Whence << ": ";
-  errs() << Message << "\n";
-  if (!Hint.empty())
-    WithColor::note() << Hint << "\n";
-  ::exit(1);
-}
-
-static void exitWithError(Error E, StringRef Whence = "") {
-  if (E.isA<CGDataError>()) {
-    handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
-      exitWithError(IPE.message(), std::string(Whence));
-    });
-    return;
-  }
-
-  exitWithError(toString(std::move(E)), std::string(Whence));
-}
-
-static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
-  exitWithError(EC.message(), std::string(Whence));
-}
-
-static int dump_main(int argc, const char *argv[]) {
-  if (Filename == OutputFilename) {
-    errs() << sys::path::filename(argv[0]) << " " << argv[1]
-           << ": Input file name cannot be the same as the output file name!\n";
-    return 1;
-  }
-
-  std::error_code EC;
-  raw_fd_ostream OS(OutputFilename.data(), EC,
-                    OutputFormat == CD_Text ? sys::fs::OF_TextWithCRLF
-                                            : sys::fs::OF_None);
-  if (EC)
-    exitWithErrorCode(EC, OutputFilename);
-
-  auto FS = vfs::getRealFileSystem();
-  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
-  if (Error E = ReaderOrErr.takeError())
-    exitWithError(std::move(E), Filename);
-
-  CodeGenDataWriter Writer;
-  auto Reader = ReaderOrErr->get();
-  if (Reader->hasOutlinedHashTree()) {
-    OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree());
-    Writer.addRecord(Record);
-  }
-
-  if (OutputFormat == CD_Text) {
-    if (Error E = Writer.writeText(OS))
-      exitWithError(std::move(E));
-  } else {
-    if (Error E = Writer.write(OS))
-      exitWithError(std::move(E));
-  }
-
-  return 0;
-}
-
-static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
-                         OutlinedHashTreeRecord &GlobalOutlineRecord);
-
-static bool handleArchive(StringRef Filename, Archive &Arch,
-                          OutlinedHashTreeRecord &GlobalOutlineRecord) {
-  bool Result = true;
-  Error Err = Error::success();
-  for (const auto &Child : Arch.children(Err)) {
-    auto BuffOrErr = Child.getMemoryBufferRef();
-    if (Error E = BuffOrErr.takeError())
-      exitWithError(std::move(E), Filename);
-    auto NameOrErr = Child.getName();
-    if (Error E = NameOrErr.takeError())
-      exitWithError(std::move(E), Filename);
-    std::string Name = (Filename + "(" + NameOrErr.get() + ")").str();
-    Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord);
-  }
-  if (Err)
-    exitWithError(std::move(Err), Filename);
-  return Result;
-}
-
-static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
-                         OutlinedHashTreeRecord &GlobalOutlineRecord) {
-  Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
-  if (Error E = BinOrErr.takeError())
-    exitWithError(std::move(E), Filename);
-
-  bool Result = true;
-  if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
-    if (Error E =
-            CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord))
-      exitWithError(std::move(E), Filename);
-  } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) {
-    Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord);
-  } else {
-    // TODO: Support for the MachO universal binary format.
-    errs() << "Error: unsupported binary file: " << Filename << "\n";
-    Result = false;
-  }
-
-  return Result;
-}
-
-static bool handleFile(StringRef Filename,
-                       OutlinedHashTreeRecord &GlobalOutlineRecord) {
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
-      MemoryBuffer::getFileOrSTDIN(Filename);
-  if (std::error_code EC = BuffOrErr.getError())
-    exitWithErrorCode(EC, Filename);
-  return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord);
-}
-
-static int merge_main(int argc, const char *argv[]) {
-  bool Result = true;
-  OutlinedHashTreeRecord GlobalOutlineRecord;
-  for (auto &Filename : InputFilenames)
-    Result &= handleFile(Filename, GlobalOutlineRecord);
-
-  if (!Result) {
-    errs() << "Error: failed to merge codegen data files.\n";
-    return 1;
-  }
-
-  CodeGenDataWriter Writer;
-  if (!GlobalOutlineRecord.empty())
-    Writer.addRecord(GlobalOutlineRecord);
-
-  std::error_code EC;
-  raw_fd_ostream Output(OutputFilename, EC, sys::fs::OF_None);
-  if (EC)
-    exitWithErrorCode(EC, OutputFilename);
-
-  if (auto E = Writer.write(Output))
-    exitWithError(std::move(E));
-
-  return 0;
-}
-
-static int show_main(int argc, const char *argv[]) {
-  if (Filename == OutputFilename) {
-    errs() << sys::path::filename(argv[0]) << " " << argv[1]
-           << ": Input file name cannot be the same as the output file name!\n";
-    return 1;
-  }
-
-  std::error_code EC;
-  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
-  if (EC)
-    exitWithErrorCode(EC, OutputFilename);
-
-  auto FS = vfs::getRealFileSystem();
-  auto ReaderOrErr = CodeGenDataReader::create(Filename, *FS);
-  if (Error E = ReaderOrErr.takeError())
-    exitWithError(std::move(E), Filename);
-
-  auto Reader = ReaderOrErr->get();
-  if (ShowCGDataVersion)
-    OS << "Version: " << Reader->getVersion() << "\n";
-
-  if (Reader->hasOutlinedHashTree()) {
-    auto Tree = Reader->releaseOutlinedHashTree();
-    OS << "Outlined hash tree:\n";
-    OS << "  Total Node Count: " << Tree->size() << "\n";
-    OS << "  Terminal Node Count: " << Tree->size(/*GetTerminalCountOnly=*/true)
-       << "\n";
-    OS << "  Depth: " << Tree->depth() << "\n";
-  }
-
-  return 0;
-}
-
-int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {
-  const char **argv = const_cast<const char **>(argvNonConst);
-
-  StringRef ProgName(sys::path::filename(argv[0]));
-
-  if (argc < 2) {
-    errs() << ProgName
-           << ": No subcommand specified! Run llvm-cgdata --help for usage.\n";
-    return 1;
-  }
-
-  cl::ParseCommandLineOptions(argc, argv, "LLVM codegen data\n");
-
-  if (DumpSubcommand)
-    return dump_main(argc, argv);
-
-  if (MergeSubcommand)
-    return merge_main(argc, argv);
-
-  if (ShowSubcommand)
-    return show_main(argc, argv);
-
-  errs() << ProgName
-         << ": Unknown command. Run llvm-cgdata --help for usage.\n";
-  return 1;
-}

From 02cafa895c917a4b1726e64a5870877c95826be4 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson@arm.com>
Date: Fri, 16 Aug 2024 14:39:43 +0000
Subject: [PATCH 188/427] [AArch64] Adopt updated B16B16 target flags

The enablement of SVE/SME non-widening BFloat16 instructions was recently
changed in response to an architecture update, in which:
	- FEAT_SVE_B16B16 was weakened
	- FEAT_SME_B16B16 was introduced
New flags, 'sve-b16b16' and 'sme-b16b16' were introduced to replace the
existing 'b16b16'. This was acheived in the below two patches.
	- https://github.com/llvm/llvm-project/pull/101480
	- https://github.com/llvm/llvm-project/pull/102501
Ideally, the interface change introduced here will be valid in LLVM-19.
We do not see it necessary to back-port the entire change, but just to add
'sme-b16b16' and 'sve-b16b16' as aliases to the existing (and unchanged)
'b16b16' and 'sme2' flags which together cover all of these features.

The predication of Bf16 variants of svmin/svminnm and svmax/svmaxnm is also
fixed in this change.
---
 clang/include/clang/Basic/arm_sve.td          | 26 +++++++++++++++----
 .../print-supported-extensions-aarch64.c      |  2 ++
 llvm/lib/Target/AArch64/AArch64Features.td    |  9 +++++++
 .../TargetParser/TargetParserTest.cpp         | 15 ++++++++++-
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 94c093d891156..fb11d743fd647 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2116,7 +2116,7 @@ def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_
 multiclass MinMaxIntr<string i, string zm, string mul, string t> {
   def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil",     MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
   def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
-  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd",      MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",      MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2134,11 +2134,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
 }
 
 multiclass SInstMinMaxByVector<string name> {
-  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
-  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
 
-  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
-  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
@@ -2172,9 +2172,25 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
   def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "hfd",      MergeNone, "aarch64_sve_fclamp_single_x4",  [IsStreaming], []>;
 }
 
+multiclass BfSingleMultiVector<string name> {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_f" # name # "_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_f" # name # "_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_f" # name # "_x4", [IsStreaming], []>;
+}
+
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,b16b16"in {
   def SVBFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]",  "22dd",   "b",      MergeNone, "aarch64_sve_bfclamp_single_x2",  [IsStreaming], []>;
   def SVBFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]",  "44dd",   "b",      MergeNone, "aarch64_sve_bfclamp_single_x4",  [IsStreaming], []>;
+
+  // bfmin, bfmax (single, multi)
+  defm SVBFMIN : BfSingleMultiVector<"min">;
+  defm SVBFMAX : BfSingleMultiVector<"max">;
+
+  // bfminnm, bfmaxnm (single, multi)
+  defm SVBFMINNM : BfSingleMultiVector<"minnm">;
+  defm SVBFMAXNM : BfSingleMultiVector<"maxnm">;
 }
 
 let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 6b969d50610f8..023647aafc8b7 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -55,6 +55,7 @@
 // CHECK-NEXT:     sha3                FEAT_SHA3, FEAT_SHA512                                 Enable SHA512 and SHA3 support
 // CHECK-NEXT:     sm4                 FEAT_SM4, FEAT_SM3                                     Enable SM3 and SM4 support
 // CHECK-NEXT:     sme                 FEAT_SME                                               Enable Scalable Matrix Extension (SME)
+// CHECK-NEXT:     sme-b16b16          FEAT_SME_B16B16                                        Enable SME2.1 ZA-targeting non-widening BFloat16 instructions
 // CHECK-NEXT:     sme-f16f16          FEAT_SME_F16F16                                        Enable SME non-widening Float16 instructions
 // CHECK-NEXT:     sme-f64f64          FEAT_SME_F64F64                                        Enable Scalable Matrix Extension (SME) F64F64 instructions
 // CHECK-NEXT:     sme-f8f16           FEAT_SME_F8F16                                         Enable Scalable Matrix Extension (SME) F8F16 instructions
@@ -71,6 +72,7 @@
 // CHECK-NEXT:     ssve-fp8dot4        FEAT_SSVE_FP8DOT4                                      Enable SVE2 FP8 4-way dot product instructions
 // CHECK-NEXT:     ssve-fp8fma         FEAT_SSVE_FP8FMA                                       Enable SVE2 FP8 multiply-add instructions
 // CHECK-NEXT:     sve                 FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT:     sve-b16b16          FEAT_SVE_B16B16                                        Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
 // CHECK-NEXT:     sve2                FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
 // CHECK-NEXT:     sve2-aes            FEAT_SVE_AES, FEAT_SVE_PMULL128                        Enable AES SVE2 instructions
 // CHECK-NEXT:     sve2-bitperm        FEAT_SVE_BitPerm                                       Enable bit permutation SVE2 instructions
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index a1ae0873fc190..69af6f7efa783 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1",
 def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16",
   "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>;
 
+// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and
+// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs
+// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19.
+def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16",
+  "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>;
+
+def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
+  "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>;
+
 def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
   "Enable SME non-widening Float16 instructions", [FeatureSME2]>;
 
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 3d55b0309d26f..9d08dd83684c9 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -2005,6 +2005,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_CPA,          AArch64::AEK_PAUTHLR,
       AArch64::AEK_TLBIW,        AArch64::AEK_JSCVT,
       AArch64::AEK_FCMA,         AArch64::AEK_FP8,
+      AArch64::AEK_SMEB16B16,    AArch64::AEK_SVEB16B16,
 
   };
 
@@ -2043,6 +2044,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
   EXPECT_TRUE(llvm::is_contained(Features, "+b16b16"));
   EXPECT_TRUE(llvm::is_contained(Features, "+rcpc"));
   EXPECT_TRUE(llvm::is_contained(Features, "+rand"));
@@ -2063,6 +2065,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64f64"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-i16i64"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-f16f16"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1"));
   EXPECT_TRUE(llvm::is_contained(Features, "+hbc"));
@@ -2188,6 +2191,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"lse", "nolse", "+lse", "-lse"},
       {"rdm", "nordm", "+rdm", "-rdm"},
       {"sve", "nosve", "+sve", "-sve"},
+      {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
       {"sve2", "nosve2", "+sve2", "-sve2"},
       {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
       {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
@@ -2212,6 +2216,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
       {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"},
       {"sme-f16f16", "nosme-f16f16", "+sme-f16f16", "-sme-f16f16"},
+      {"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"},
       {"sme2", "nosme2", "+sme2", "-sme2"},
       {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"},
       {"hbc", "nohbc", "+hbc", "-hbc"},
@@ -2452,6 +2457,12 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"nobf16", "b16b16"}, {"bf16", "b16b16"}, {}},
         {AArch64::ARMV8A, {"b16b16", "nobf16"}, {}, {"bf16", "b16b16"}},
 
+        // b16b16 -> {sve-b16b16, sme-b16b16}
+        {AArch64::ARMV8A, {"nob16b16", "sve-b16b16"}, {"b16b16", "sve-b16b16"}, {}},
+        {AArch64::ARMV8A, {"sve-b16b16", "nob16b16"}, {}, {"sve-b16b16", "b16b16"}},
+        {AArch64::ARMV8A, {"nob16b16", "sme-b16b16"}, {"b16b16", "sme-b16b16"}, {}},
+        {AArch64::ARMV8A, {"sme-b16b16", "nob16b16"}, {}, {"b16b16", "sme-b16b16"}},
+
         // sve -> {sve2, f32mm, f64mm}
         {AArch64::ARMV8A, {"nosve", "sve2"}, {"sve", "sve2"}, {}},
         {AArch64::ARMV8A, {"sve2", "nosve"}, {}, {"sve", "sve2"}},
@@ -2491,7 +2502,7 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"sme-fa64", "nosme"}, {}, {"sme", "sme-fa64"}},
 
         // sme2 -> {sme2p1, ssve-fp8fma, ssve-fp8dot2, ssve-fp8dot4, sme-f8f16,
-        // sme-f8f32}
+        // sme-f8f32, sme-b16b16}
         {AArch64::ARMV8A, {"nosme2", "sme2p1"}, {"sme2", "sme2p1"}, {}},
         {AArch64::ARMV8A, {"sme2p1", "nosme2"}, {}, {"sme2", "sme2p1"}},
         {AArch64::ARMV8A,
@@ -2522,6 +2533,8 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"sme-f8f16", "nosme2"}, {}, {"sme2", "sme-f8f16"}},
         {AArch64::ARMV8A, {"nosme2", "sme-f8f32"}, {"sme2", "sme-f8f32"}, {}},
         {AArch64::ARMV8A, {"sme-f8f32", "nosme2"}, {}, {"sme2", "sme-f8f32"}},
+        {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}},
+        {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}},
 
         // fp8 -> {sme-f8f16, sme-f8f32}
         {AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}},

From 90f2d48965ca8a27f4b814ada987d169ca6a6f44 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Fri, 16 Aug 2024 11:08:34 -0400
Subject: [PATCH 189/427] [libc++] Fix rejects-valid in std::span copy
 construction (#104500)

Trying to copy-construct a std::span from another std::span holding an
incomplete type would fail as we evaluate the SFINAE for the range-based
constructor. The problem was that we checked for __is_std_span after
checking for the range being a contiguous_range, which hard-errored
because of arithmetic on a pointer to incomplete type.

As a drive-by, refactor the whole test and format it.

Fixes #104496

(cherry picked from commit 99696b35bc8a0054e0b0c1a26e8dd5049fa8c41b)
---
 libcxx/include/span                           |   2 +-
 .../views/views.span/span.cons/copy.pass.cpp  | 126 ++++++++++++------
 2 files changed, 86 insertions(+), 42 deletions(-)

diff --git a/libcxx/include/span b/libcxx/include/span
index 60d76d830f0f3..da631cdc3f90e 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -206,10 +206,10 @@ struct __is_std_span<span<_Tp, _Sz>> : true_type {};
 
 template <class _Range, class _ElementType>
 concept __span_compatible_range =
+    !__is_std_span<remove_cvref_t<_Range>>::value &&                //
     ranges::contiguous_range<_Range> &&                             //
     ranges::sized_range<_Range> &&                                  //
     (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && //
-    !__is_std_span<remove_cvref_t<_Range>>::value &&                //
     !__is_std_array<remove_cvref_t<_Range>>::value &&               //
     !is_array_v<remove_cvref_t<_Range>> &&                          //
     is_convertible_v<remove_reference_t<ranges::range_reference_t<_Range>> (*)[], _ElementType (*)[]>;
diff --git a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
index 28f13e122ddc5..d3990fd60a459 100644
--- a/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
+++ b/libcxx/test/std/containers/views/views.span/span.cons/copy.pass.cpp
@@ -5,6 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 
 // <span>
@@ -14,58 +15,101 @@
 #include <span>
 #include <cassert>
 #include <string>
+#include <utility>
 
 #include "test_macros.h"
 
-template <typename T>
-constexpr bool doCopy(const T &rhs)
-{
-    ASSERT_NOEXCEPT(T{rhs});
-    T lhs{rhs};
-    return lhs.data() == rhs.data()
-     &&    lhs.size() == rhs.size();
-}
+template <class T>
+constexpr void test() {
+  ASSERT_NOEXCEPT(std::span<T>(std::declval<std::span<T> const&>()));
+  ASSERT_NOEXCEPT(std::span<T>{std::declval<std::span<T> const&>()});
 
-struct A{};
-
-template <typename T>
-void testCV ()
-{
-    int  arr[] = {1,2,3};
-    assert((doCopy(std::span<T>  ()          )));
-    assert((doCopy(std::span<T,0>()          )));
-    assert((doCopy(std::span<T>  (&arr[0], 1))));
-    assert((doCopy(std::span<T,1>(&arr[0], 1))));
-    assert((doCopy(std::span<T>  (&arr[0], 2))));
-    assert((doCopy(std::span<T,2>(&arr[0], 2))));
+  // dynamic_extent
+  {
+    std::span<T> x;
+    std::span<T> copy(x);
+    assert(copy.data() == x.data());
+    assert(copy.size() == x.size());
+  }
+  {
+    T array[3] = {};
+    std::span<T> x(array, 3);
+    std::span<T> copy(x);
+    assert(copy.data() == array);
+    assert(copy.size() == 3);
+  }
+  {
+    T array[3] = {};
+    std::span<T> x(array, 2);
+    std::span<T> copy(x);
+    assert(copy.data() == array);
+    assert(copy.size() == 2);
+  }
+
+  // static extent
+  {
+    std::span<T, 0> x;
+    std::span<T, 0> copy(x);
+    assert(copy.data() == x.data());
+    assert(copy.size() == x.size());
+  }
+  {
+    T array[3] = {};
+    std::span<T, 3> x(array);
+    std::span<T, 3> copy(x);
+    assert(copy.data() == array);
+    assert(copy.size() == 3);
+  }
+  {
+    T array[2] = {};
+    std::span<T, 2> x(array);
+    std::span<T, 2> copy(x);
+    assert(copy.data() == array);
+    assert(copy.size() == 2);
+  }
 }
 
+struct Foo {};
+
+constexpr bool test_all() {
+  test<int>();
+  test<const int>();
+  test<volatile int>();
+  test<const volatile int>();
 
-int main(int, char**)
-{
-    constexpr int carr[] = {1,2,3};
+  test<long>();
+  test<const long>();
+  test<volatile long>();
+  test<const volatile long>();
 
-    static_assert(doCopy(std::span<      int>  ()),            "");
-    static_assert(doCopy(std::span<      int,0>()),            "");
-    static_assert(doCopy(std::span<const int>  (&carr[0], 1)), "");
-    static_assert(doCopy(std::span<const int,1>(&carr[0], 1)), "");
-    static_assert(doCopy(std::span<const int>  (&carr[0], 2)), "");
-    static_assert(doCopy(std::span<const int,2>(&carr[0], 2)), "");
+  test<double>();
+  test<const double>();
+  test<volatile double>();
+  test<const volatile double>();
 
-    static_assert(doCopy(std::span<long>()),   "");
-    static_assert(doCopy(std::span<double>()), "");
-    static_assert(doCopy(std::span<A>()),      "");
+  // Note: Can't test non-fundamental types with volatile because we require `T*` to be indirectly_readable,
+  //       which isn't the case when T is volatile.
+  test<Foo>();
+  test<const Foo>();
 
-    std::string s;
-    assert(doCopy(std::span<std::string>   ()     ));
-    assert(doCopy(std::span<std::string, 0>()     ));
-    assert(doCopy(std::span<std::string>   (&s, 1)));
-    assert(doCopy(std::span<std::string, 1>(&s, 1)));
+  test<std::string>();
+  test<const std::string>();
+
+  // Regression test for https://github.com/llvm/llvm-project/issues/104496
+  {
+    struct Incomplete;
+    std::span<Incomplete> x;
+    std::span<Incomplete> copy(x);
+    assert(copy.data() == x.data());
+    assert(copy.size() == x.size());
+  }
+
+  return true;
+}
 
-    testCV<               int>();
-    testCV<const          int>();
-    testCV<      volatile int>();
-    testCV<const volatile int>();
+int main(int, char**) {
+  test_all();
+  static_assert(test_all());
 
   return 0;
 }

From b45f75295e3038ef79dce4ac63fbf95b659eebe5 Mon Sep 17 00:00:00 2001
From: Piotr Zegar <me@piotrzegar.pl>
Date: Thu, 25 Jul 2024 17:26:01 +0200
Subject: [PATCH 190/427] [clang-tidy] Fix crash in C language in
 readability-non-const-parameter (#100461)

Fix crash that happen when redeclaration got
different number of parameters than definition.

Fixes #100340

(cherry picked from commit a27f816fe56af9cc7f4f296ad6c577f6ea64349f)
---
 .../clang-tidy/readability/NonConstParameterCheck.cpp |  5 ++++-
 clang-tools-extra/docs/ReleaseNotes.rst               |  4 ++++
 .../checkers/readability/non-const-parameter.c        | 11 +++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c

diff --git a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
index 95a3a5165e2e8..43b69a24bdb16 100644
--- a/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp
@@ -157,9 +157,12 @@ void NonConstParameterCheck::diagnoseNonConstParameters() {
     if (!Function)
       continue;
     unsigned Index = Par->getFunctionScopeIndex();
-    for (FunctionDecl *FnDecl : Function->redecls())
+    for (FunctionDecl *FnDecl : Function->redecls()) {
+      if (FnDecl->getNumParams() <= Index)
+        continue;
       Fixes.push_back(FixItHint::CreateInsertion(
           FnDecl->getParamDecl(Index)->getBeginLoc(), "const "));
+    }
 
     diag(Par->getLocation(), "pointer parameter '%0' can be pointer to const")
         << Par->getName() << Fixes;
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 083b098d05d4a..7146196862986 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -496,6 +496,10 @@ Changes in existing checks
   ``static_cast``. Fixed false positives in C++20 spaceship operator by ignoring
   casts in implicit and defaulted functions.
 
+- Improved :doc:`readability-non-const-parameter
+  <clang-tidy/checks/readability/non-const-parameter>` check to not crash when
+  redeclaration have fewer parameters than expected.
+
 - Improved :doc:`readability-redundant-inline-specifier
   <clang-tidy/checks/readability/redundant-inline-specifier>` check to properly
   emit warnings for static data member with an in-class initializer.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
new file mode 100644
index 0000000000000..db50467f3dd94
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/non-const-parameter.c
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy %s readability-non-const-parameter %t
+
+static int f();
+
+int f(p)
+  int *p;
+// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: pointer parameter 'p' can be pointer to const [readability-non-const-parameter]
+// CHECK-FIXES: {{^}}  const int *p;{{$}}
+{
+    return *p;
+}

From 8fbe69a407b2784c7e9d91a3c69daa9786b14391 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye@arm.com>
Date: Tue, 6 Aug 2024 11:39:01 +0100
Subject: [PATCH 191/427] [AArch64] Add streaming-mode stack hazard
 optimization remarks (#101695)

Emit an optimization remark when objects in the stack frame may cause
hazards in a streaming mode function. The analysis requires either the
`aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag
to be set by the user, with the former flag taking precedence.

(cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6)
---
 .../llvm/CodeGen/TargetFrameLowering.h        |   6 +
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     |   3 +
 .../Target/AArch64/AArch64FrameLowering.cpp   | 204 +++++++++++++++++-
 .../lib/Target/AArch64/AArch64FrameLowering.h |   6 +-
 .../AArch64/ssve-stack-hazard-remarks.ll      | 152 +++++++++++++
 .../CodeGen/AArch64/sve-stack-frame-layout.ll |   4 +-
 6 files changed, 364 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll

diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0656c0d739fdf..d8c9d0a432ad8 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/Support/TypeSize.h"
 #include <vector>
 
@@ -473,6 +474,11 @@ class TargetFrameLowering {
   /// Return the frame base information to be encoded in the DWARF subprogram
   /// debug info.
   virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+  /// This method is called at the end of prolog/epilog code insertion, so
+  /// targets can emit remarks based on the final frame layout.
+  virtual void emitRemarks(const MachineFunction &MF,
+                           MachineOptimizationRemarkEmitter *ORE) const {};
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d82..f4490873cfdcd 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
            << ore::NV("Function", MF.getFunction().getName()) << "'";
   });
 
+  // Emit any remarks implemented for the target, based on final frame layout.
+  TFI->emitRemarks(MF, ORE);
+
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb664..ba46ededc63a8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -240,6 +240,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
 // Stack hazard padding size. 0 = disabled.
 static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
                                          cl::init(0), cl::Hidden);
+// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
+static cl::opt<unsigned>
+    StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
+                          cl::Hidden);
 // Whether to insert padding into non-streaming functions (for testing).
 static cl::opt<bool>
     StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
   const auto &MFI = MF.getFrameInfo();
 
   int64_t ObjectOffset = MFI.getObjectOffset(FI);
+  StackOffset SVEStackSize = getSVEStackSize(MF);
+
+  // For VLA-area objects, just emit an offset at the end of the stack frame.
+  // Whilst not quite correct, these objects do live at the end of the frame and
+  // so it is more useful for analysis for the offset to reflect this.
+  if (MFI.isVariableSizedObjectIndex(FI)) {
+    return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
+  }
 
   // This is correct in the absence of any SVE stack objects.
-  StackOffset SVEStackSize = getSVEStackSize(MF);
   if (!SVEStackSize)
     return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
 
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
-// Return the FrameID for a Load/Store instruction by looking at the MMO.
-static std::optional<int> getLdStFrameID(const MachineInstr &MI,
-                                         const MachineFrameInfo &MFI) {
-  if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
-    return std::nullopt;
-
-  MachineMemOperand *MMO = *MI.memoperands_begin();
+// Return the FrameID for a MMO.
+static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
+                                        const MachineFrameInfo &MFI) {
   auto *PSV =
       dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
   if (PSV)
@@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
   return std::nullopt;
 }
 
+// Return the FrameID for a Load/Store instruction by looking at the first MMO.
+static std::optional<int> getLdStFrameID(const MachineInstr &MI,
+                                         const MachineFrameInfo &MFI) {
+  if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+    return std::nullopt;
+
+  return getMMOFrameID(*MI.memoperands_begin(), MFI);
+}
+
 // Check if a Hazard slot is needed for the current function, and if so create
 // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
 // which can be used to determine if any hazard padding is needed.
@@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
     MI->eraseFromParent();
   }
 }
+
+struct StackAccess {
+  enum AccessType {
+    NotAccessed = 0, // Stack object not accessed by load/store instructions.
+    GPR = 1 << 0,    // A general purpose register.
+    PPR = 1 << 1,    // A predicate register.
+    FPR = 1 << 2,    // A floating point/Neon/SVE register.
+  };
+
+  int Idx;
+  StackOffset Offset;
+  int64_t Size;
+  unsigned AccessTypes;
+
+  StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
+
+  bool operator<(const StackAccess &Rhs) const {
+    return std::make_tuple(start(), Idx) <
+           std::make_tuple(Rhs.start(), Rhs.Idx);
+  }
+
+  bool isCPU() const {
+    // Predicate register load and store instructions execute on the CPU.
+    return AccessTypes & (AccessType::GPR | AccessType::PPR);
+  }
+  bool isSME() const { return AccessTypes & AccessType::FPR; }
+  bool isMixed() const { return isCPU() && isSME(); }
+
+  int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
+  int64_t end() const { return start() + Size; }
+
+  std::string getTypeString() const {
+    switch (AccessTypes) {
+    case AccessType::FPR:
+      return "FPR";
+    case AccessType::PPR:
+      return "PPR";
+    case AccessType::GPR:
+      return "GPR";
+    case AccessType::NotAccessed:
+      return "NA";
+    default:
+      return "Mixed";
+    }
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << getTypeString() << " stack object at [SP"
+       << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
+    if (Offset.getScalable())
+      OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
+         << " * vscale";
+    OS << "]";
+  }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
+  SA.print(OS);
+  return OS;
+}
+
+void AArch64FrameLowering::emitRemarks(
+    const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
+
+  SMEAttrs Attrs(MF.getFunction());
+  if (Attrs.hasNonStreamingInterfaceAndBody())
+    return;
+
+  const uint64_t HazardSize =
+      (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
+
+  if (HazardSize == 0)
+    return;
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  // Bail if function has no stack objects.
+  if (!MFI.hasStackObjects())
+    return;
+
+  std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
+
+  size_t NumFPLdSt = 0;
+  size_t NumNonFPLdSt = 0;
+
+  // Collect stack accesses via Load/Store instructions.
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+        continue;
+      for (MachineMemOperand *MMO : MI.memoperands()) {
+        std::optional<int> FI = getMMOFrameID(MMO, MFI);
+        if (FI && !MFI.isDeadObjectIndex(*FI)) {
+          int FrameIdx = *FI;
+
+          size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
+          if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
+            StackAccesses[ArrIdx].Idx = FrameIdx;
+            StackAccesses[ArrIdx].Offset =
+                getFrameIndexReferenceFromSP(MF, FrameIdx);
+            StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
+          }
+
+          unsigned RegTy = StackAccess::AccessType::GPR;
+          if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
+            if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+              RegTy = StackAccess::PPR;
+            else
+              RegTy = StackAccess::FPR;
+          } else if (AArch64InstrInfo::isFpOrNEON(MI)) {
+            RegTy = StackAccess::FPR;
+          }
+
+          StackAccesses[ArrIdx].AccessTypes |= RegTy;
+
+          if (RegTy == StackAccess::FPR)
+            ++NumFPLdSt;
+          else
+            ++NumNonFPLdSt;
+        }
+      }
+    }
+  }
+
+  if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
+    return;
+
+  llvm::sort(StackAccesses);
+  StackAccesses.erase(llvm::remove_if(StackAccesses,
+                                      [](const StackAccess &S) {
+                                        return S.AccessTypes ==
+                                               StackAccess::NotAccessed;
+                                      }),
+                      StackAccesses.end());
+
+  SmallVector<const StackAccess *> MixedObjects;
+  SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
+
+  if (StackAccesses.front().isMixed())
+    MixedObjects.push_back(&StackAccesses.front());
+
+  for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
+       It != End; ++It) {
+    const auto &First = *It;
+    const auto &Second = *(It + 1);
+
+    if (Second.isMixed())
+      MixedObjects.push_back(&Second);
+
+    if ((First.isSME() && Second.isCPU()) ||
+        (First.isCPU() && Second.isSME())) {
+      uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
+      if (Distance < HazardSize)
+        HazardPairs.emplace_back(&First, &Second);
+    }
+  }
+
+  auto EmitRemark = [&](llvm::StringRef Str) {
+    ORE->emit([&]() {
+      auto R = MachineOptimizationRemarkAnalysis(
+          "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
+      return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
+    });
+  };
+
+  for (const auto &P : HazardPairs)
+    EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
+
+  for (const auto *Obj : MixedObjects)
+    EmitRemark(
+        formatv("{0} accessed by both GP and FP instructions", *Obj).str());
+}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0ebab1700e9ce..c197312496208 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,8 +13,9 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
 
-#include "llvm/Support/TypeSize.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
 
 namespace llvm {
 
@@ -178,6 +179,9 @@ class AArch64FrameLowering : public TargetFrameLowering {
   inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
                                     int64_t NegProbeSize,
                                     Register TargetReg) const;
+
+  void emitRemarks(const MachineFunction &MF,
+                   MachineOptimizationRemarkEmitter *ORE) const override;
 };
 
 } // End llvm namespace
diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
new file mode 100644
index 0000000000000..0b6bf3892a0c2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING
+
+; Don't emit remarks for non-streaming functions.
+define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+entry:
+  tail call void asm sideeffect "", "~{x20}"() #1
+  ret float %i
+}
+
+; Don't emit remarks for functions that only access GPR stack objects.
+define i64 @stackargs_gpr(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+entry:
+  ret i64 %i
+}
+
+; Don't emit remarks for functions that only access FPR stack objects.
+define double @stackargs_fpr(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+entry:
+  ret double %i
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_alloci64(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64':
+entry:
+  %a = alloca i64
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store i64 %d, ptr %a
+  ret i32 0
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_allocnxv4i32(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32':
+entry:
+  %a = alloca <vscale x 4 x i32>
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> zeroinitializer, ptr %a
+  ret i32 0
+}
+
+define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+entry:
+  tail call void asm sideeffect "", "~{x20}"() #1
+  ret float %i
+}
+
+; In this case, addition of stack hazard padding triggers x29 (fp) spill, so we hazard occurs between FPR argument and GPR spill.
+define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs':
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs': GPR stack object at [SP-8] is too close to FPR stack object at [SP+0]
+entry:
+  tail call void asm sideeffect "", "~{d8}"() #1
+  ret float %i
+}
+
+; SVE calling conventions
+; Predicate register spills end up in FP region, currently.
+
+define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_call':
+entry:
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
+  ret i32 -396142473
+}
+
+define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call':
+entry:
+  tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+  %0 = alloca [37 x i8], align 16
+  %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37)
+  ret i32 -396142473
+}
+declare ptr @memset(ptr, i32, i32)
+
+%struct.mixed_struct = type { i32, float }
+
+define i32 @mixed_stack_object(i32  %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+  %s = alloca %struct.mixed_struct
+  %s.i = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 0
+  %s.f = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 1
+  store i32 %a, ptr %s.i
+  store float %b, ptr %s.f
+  ret i32 %a
+}
+
+define i32 @mixed_stack_objects(i32  %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+  %s0 = alloca %struct.mixed_struct
+  %s0.i = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 0
+  %s0.f = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 1
+  store i32 %a, ptr %s0.i
+  store float %b, ptr %s0.f
+
+  %s1 = alloca %struct.mixed_struct
+  %s1.i = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 0
+  %s1.f = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 1
+  store i32 %a, ptr %s1.i
+  store float %b, ptr %s1.f
+
+  ret i32 %a
+}
+
+; VLA-area stack objects are not separated.
+define i32 @csr_d8_allocnxv4i32i32f64_vlai32f64(double %d, i32 %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-48-16 * vscale] is too close to FPR stack object at [SP-48-16 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': FPR stack object at [SP-32] is too close to GPR stack object at [SP-24]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-2096-16 * vscale] is too close to FPR stack object at [SP-2096-16 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64':
+entry:
+  %a = alloca <vscale x 4 x i32>
+  %0 = zext i32 %i to i64
+  %vla0 = alloca i32, i64 %0
+  %vla1 = alloca double, i64 %0
+  %c = alloca double
+  tail call void asm sideeffect "", "~{d8}"() #1
+  store <vscale x 4 x i32> zeroinitializer, ptr %a
+  store i32 zeroinitializer, ptr %vla0
+  store double %d, ptr %vla1
+  store double %d, ptr %c
+  ret i32 0
+}
+
+attributes #2 = { "aarch64_pstate_sm_compatible" }
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 431c9dc76508f..ec94198a08ca7 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -150,8 +150,8 @@ entry:
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
 
 define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
 ; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:

From 6e3026883d77124e32a2a7be72c3361fba3e7457 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com>
Date: Mon, 12 Aug 2024 09:08:46 +0200
Subject: [PATCH 192/427] [clang] Avoid triggering vtable instantiation for
 C++23 constexpr dtor (#102605)

In C++23 anything can be constexpr, including a dtor of a class whose
members and bases don't have constexpr dtors. Avoid early triggering of
vtable instantiation int this case.

Fixes https://github.com/llvm/llvm-project/issues/102293

(cherry picked from commit d469794d0cdfd2fea50a6ce0c0e33abb242d744c)
---
 clang/lib/Sema/SemaDeclCXX.cpp  | 29 ++++++++++++++++++++++++++++-
 clang/test/SemaCXX/gh102293.cpp | 22 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/SemaCXX/gh102293.cpp

diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 66ca62f5d7c4c..ecf8754143a49 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7042,11 +7042,38 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
       }
     }
 
+    bool EffectivelyConstexprDestructor = true;
+    // Avoid triggering vtable instantiation due to a dtor that is not
+    // "effectively constexpr" for better compatibility.
+    // See https://github.com/llvm/llvm-project/issues/102293 for more info.
+    if (isa<CXXDestructorDecl>(M)) {
+      auto Check = [](QualType T, auto &&Check) -> bool {
+        const CXXRecordDecl *RD =
+            T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
+        if (!RD || !RD->isCompleteDefinition())
+          return true;
+
+        if (!RD->hasConstexprDestructor())
+          return false;
+
+        for (const CXXBaseSpecifier &B : RD->bases())
+          if (!Check(B.getType(), Check))
+            return false;
+        for (const FieldDecl *FD : RD->fields())
+          if (!Check(FD->getType(), Check))
+            return false;
+        return true;
+      };
+      EffectivelyConstexprDestructor =
+          Check(QualType(Record->getTypeForDecl(), 0), Check);
+    }
+
     // Define defaulted constexpr virtual functions that override a base class
     // function right away.
     // FIXME: We can defer doing this until the vtable is marked as used.
     if (CSM != CXXSpecialMemberKind::Invalid && !M->isDeleted() &&
-        M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods())
+        M->isDefaulted() && M->isConstexpr() && M->size_overridden_methods() &&
+        EffectivelyConstexprDestructor)
       DefineDefaultedFunction(*this, M, M->getLocation());
 
     if (!Incomplete)
diff --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp
new file mode 100644
index 0000000000000..30629fc03bf6a
--- /dev/null
+++ b/clang/test/SemaCXX/gh102293.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+template <typename T> static void destroy() {
+    T t;
+    ++t;
+}
+
+struct Incomplete;
+
+template <typename = int> struct HasD {
+  ~HasD() { destroy<Incomplete*>(); }
+};
+
+struct HasVT {
+  virtual ~HasVT();
+};
+
+struct S : HasVT {
+  HasD<> v;
+};
+

From 38a591de66a86aaf523f78f8266a2d5f01a1b106 Mon Sep 17 00:00:00 2001
From: Tulio Magno Quites Machado Filho <tuliom@redhat.com>
Date: Tue, 13 Aug 2024 15:34:41 -0300
Subject: [PATCH 193/427] [OpenMP][AArch64] Fix branch protection in microtasks
 (#102317)

Start __kmp_invoke_microtask with PACBTI in order to identify the
function as a valid branch target. Before returning, SP is
authenticated.
Also add the BTI and PAC markers to z_Linux_asm.S.

With this patch, libomp.so can now be generated with DT_AARCH64_BTI_PLT
when built with -mbranch-protection=standard.

The implementation is based on the code available in compiler-rt.

(cherry picked from commit 0aa22dcd2f6ec5f46b8ef18fee88066463734935)
---
 openmp/runtime/src/z_Linux_asm.S | 53 ++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index 5b614e26a8337..223ad091030e7 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -176,6 +176,53 @@ KMP_PREFIX_UNDERSCORE(\proc):
 .endm
 # endif // KMP_OS_DARWIN
 
+# if KMP_OS_LINUX
+// BTI and PAC gnu property note
+#  define NT_GNU_PROPERTY_TYPE_0 5
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#  define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#  define GNU_PROPERTY(type, value)                                            \
+  .pushsection .note.gnu.property, "a";                                        \
+  .p2align 3;                                                                  \
+  .word 4;                                                                     \
+  .word 16;                                                                    \
+  .word NT_GNU_PROPERTY_TYPE_0;                                                \
+  .asciz "GNU";                                                                \
+  .word type;                                                                  \
+  .word 4;                                                                     \
+  .word value;                                                                 \
+  .word 0;                                                                     \
+  .popsection
+# endif
+
+# if defined(__ARM_FEATURE_BTI_DEFAULT)
+#  define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+# else
+#  define BTI_FLAG 0
+# endif
+# if __ARM_FEATURE_PAC_DEFAULT & 3
+#  define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+# else
+#  define PAC_FLAG 0
+# endif
+
+# if (BTI_FLAG | PAC_FLAG) != 0
+#  if PAC_FLAG != 0
+#   define PACBTI_C hint #25
+#   define PACBTI_RET hint #29
+#  else
+#   define PACBTI_C hint #34
+#   define PACBTI_RET
+#  endif
+#  define GNU_PROPERTY_BTI_PAC \
+    GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+# else
+#  define PACBTI_C
+#  define PACBTI_RET
+#  define GNU_PROPERTY_BTI_PAC
+# endif
 #endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
 
 .macro COMMON name, size, align_power
@@ -1296,6 +1343,7 @@ __tid = 8
 // mark_begin;
 	.text
 	PROC __kmp_invoke_microtask
+	PACBTI_C
 
 	stp	x29, x30, [sp, #-16]!
 # if OMPT_SUPPORT
@@ -1359,6 +1407,7 @@ KMP_LABEL(kmp_1):
 	ldp	x19, x20, [sp], #16
 # endif
 	ldp	x29, x30, [sp], #16
+	PACBTI_RET
 	ret
 
 	DEBUG_INFO __kmp_invoke_microtask
@@ -2472,3 +2521,7 @@ __kmp_unnamed_critical_addr:
     .4byte .gomp_critical_user_
     .size __kmp_unnamed_critical_addr, 4
 #endif
+
+#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
+GNU_PROPERTY_BTI_PAC
+#endif

From 6fcbfb8ebc9650a2ea184aac244d067efdbe441e Mon Sep 17 00:00:00 2001
From: Sharadh Rajaraman <r.sharadh@outlook.sg>
Date: Mon, 19 Aug 2024 12:17:58 +0100
Subject: [PATCH 194/427] [clang][driver] `TY_ModuleFile` should be a 'CXX'
 file type

---
 clang/lib/Driver/Types.cpp              | 4 +++-
 clang/test/Driver/cl-cxx20-modules.cppm | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index a7b6b9000e1d2..2b9b391c19c9f 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -242,7 +242,9 @@ bool types::isCXX(ID Id) {
   case TY_CXXHUHeader:
   case TY_PP_CXXHeaderUnit:
   case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
-  case TY_CXXModule: case TY_PP_CXXModule:
+  case TY_CXXModule:
+  case TY_PP_CXXModule:
+  case TY_ModuleFile:
   case TY_PP_CLCXX:
   case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
   case TY_HIP:
diff --git a/clang/test/Driver/cl-cxx20-modules.cppm b/clang/test/Driver/cl-cxx20-modules.cppm
index 06df929c42342..43dbf517485a0 100644
--- a/clang/test/Driver/cl-cxx20-modules.cppm
+++ b/clang/test/Driver/cl-cxx20-modules.cppm
@@ -1,3 +1,6 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
 // RUN: %clang_cl /std:c++20 --precompile -### -- %s 2>&1 | FileCheck --check-prefix=PRECOMPILE %s
 // PRECOMPILE: -emit-module-interface
 
@@ -6,3 +9,8 @@
 
 // RUN: %clang_cl /std:c++20 --fprebuilt-module-path=. -### -- %s 2>&1 | FileCheck --check-prefix=FPREBUILT %s
 // FPREBUILT: -fprebuilt-module-path=.
+
+// RUN: %clang_cl %t/test.pcm /std:c++20 -### 2>&1 | FileCheck --check-prefix=CPP20WARNING %t/test.pcm
+
+//--- test.pcm
+// CPP20WARNING-NOT: clang-cl: warning: argument unused during compilation: '/std:c++20' [-Wunused-command-line-argument]

From 9545ef53ebe8be2a53ef6f84626f52bed73c82ba Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 16 Aug 2024 14:54:51 -0700
Subject: [PATCH 195/427] [Mips] Fix fast isel for i16 bswap. (#103398)

We need to mask the SRL result to 8 bits before ORing in the SLL. This
is needed in case bits 23:16 of the input aren't zero. They will have
been shifted into bits 15:8.

We don't need to AND the result with 0xffff. It's ok if the upper 16
bits of the register are garbage.

Fixes #103035.

(cherry picked from commit ebe7265b142f370f0a563fece5db22f57383ba2d)
---
 llvm/lib/Target/Mips/MipsFastISel.cpp      | 4 ++--
 llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index bd8ef43da625c..64a0e9321598f 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
         }
         emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
         emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8);
-        emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]);
-        emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF);
+        emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF);
+        emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]);
         updateValueMap(II, DestReg);
         return true;
       }
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
index bd762a0e1d741..ce664c78e86c2 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/bswap1.ll
@@ -21,8 +21,8 @@ define void @b16() {
 
   ; 32R1:           sll   $[[TMP1:[0-9]+]], $[[A_VAL]], 8
   ; 32R1:           srl   $[[TMP2:[0-9]+]], $[[A_VAL]], 8
-  ; 32R1:           or    $[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]]
-  ; 32R1:           andi  $[[TMP4:[0-9]+]], $[[TMP3]], 65535
+  ; 32R1:           andi  $[[TMP3:[0-9]+]], $[[TMP2]], 255
+  ; 32R1:           or    $[[RESULT:[0-9]+]], $[[TMP1]], $[[TMP3]]
 
   ; 32R2:           wsbh  $[[RESULT:[0-9]+]], $[[A_VAL]]
 

From 8595e91b16dadc33fbb321cfd30b77f43f64e10e Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov <anton@korobeynikov.info>
Date: Fri, 16 Aug 2024 18:09:53 -0700
Subject: [PATCH 196/427] Add some brief LLVM 19 release notes for Pointer
 Authentication ABI support.

---
 clang/docs/ReleaseNotes.rst |  8 ++++++++
 llvm/docs/ReleaseNotes.rst  | 14 ++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b56e7177846d9..17ddbfe910f87 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1191,11 +1191,13 @@ Arm and AArch64 Support
   improvements for most targets. We have not changed the default behavior for
   ARMv6, but may revisit that decision in the future. Users can restore the old
   behavior with -m[no-]unaligned-access.
+
 - An alias identifier (rdma) has been added for targeting the AArch64
   Architecture Extension which uses Rounding Doubling Multiply Accumulate
   instructions (rdm). The identifier is available on the command line as
   a feature modifier for -march and -mcpu as well as via target attributes
   like ``target_version`` or ``target_clones``.
+
 - Support has been added for the following processors (-mcpu identifiers in parenthesis):
     * Arm Cortex-R52+ (cortex-r52plus).
     * Arm Cortex-R82AE (cortex-r82ae).
@@ -1213,6 +1215,12 @@ Arm and AArch64 Support
   objects. It doesn't cause any code generation changes, as the code generated
   by clang is already compatible with GCS.
 
+ - Experimental support has been added for pointer authentication ABI for С/C++.
+
+ - Pointer authentication ABI could be enabled for AArch64 Linux via
+   ``-mabi=pauthtest`` option or via specifying ``pauthtest`` environment part of
+   target triple.
+
 Android Support
 ^^^^^^^^^^^^^^^
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index a81caa160883d..60b6c6e786df8 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -80,6 +80,11 @@ Changes to the LLVM IR
     removed. The next argument has been changed from byte index to bit
     index.
 * Added ``llvm.experimental.vector.compress`` intrinsic.
+* Added special kind of `constant expressions
+  <https://llvm.org/docs/LangRef.html#pointer-authentication-constants>`_ to
+  represent pointers with signature embedded into it.
+* Added `pointer authentication operand bundles
+  <https://llvm.org/docs/LangRef.html#pointer-authentication-operand-bundles>`_. 
 
 Changes to LLVM infrastructure
 ------------------------------
@@ -125,6 +130,15 @@ Changes to the AArch64 Backend
   when specified via ``-march=`` or an ``-mcpu=`` that supports them.  The
   attribute ``"target-features"="+v9a"`` no longer implies ``"+sve"`` and
   ``"+sve2"`` respectively.
+* Added support for ELF pointer authentication relocations as specified in
+  `PAuth ABI Extension to ELF
+  <https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst>`_.
+* Added codegeneration, ELF object file and linker support for authenticated
+  call lowering, signed constants and emission of signing scheme details in
+  ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` property of ``.note.gnu.property``
+  section.
+* Added codegeneration support for ``llvm.ptrauth.auth`` and
+  ``llvm.ptrauth.resign`` intrinsics.
 
 Changes to the AMDGPU Backend
 -----------------------------

From bb46c721211b901f7ab34551e4bb240308203da9 Mon Sep 17 00:00:00 2001
From: Vladislav Khmelevsky <och95@yandex.ru>
Date: Sat, 27 Jul 2024 23:07:59 +0400
Subject: [PATCH 197/427] release/19.x: [BOLT] Fix relocations handling

Backport https://github.com/llvm/llvm-project/commit/097ddd3565f830e6cb9d0bb8ca66844b7f3f3cbb
---
 bolt/lib/Rewrite/RewriteInstance.cpp       |   2 +-
 bolt/test/AArch64/Inputs/build_id.ldscript |   9 +
 bolt/test/AArch64/build_id.c               |  25 ++
 bolt/test/X86/Inputs/build_id.yaml         | 326 +++++++++++++++++++++
 bolt/test/X86/build_id.test                |   8 +
 5 files changed, 369 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/AArch64/Inputs/build_id.ldscript
 create mode 100644 bolt/test/AArch64/build_id.c
 create mode 100644 bolt/test/X86/Inputs/build_id.yaml
 create mode 100644 bolt/test/X86/build_id.test

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 5a2c2637eb01f..3d24936271bf8 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2612,7 +2612,7 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
       Expected<StringRef> SectionName = Section->getName();
       if (SectionName && !SectionName->empty())
         ReferencedSection = BC->getUniqueSectionByName(*SectionName);
-    } else if (ReferencedSymbol && ContainingBF &&
+    } else if (BC->isRISCV() && ReferencedSymbol && ContainingBF &&
                (cantFail(Symbol.getFlags()) & SymbolRef::SF_Absolute)) {
       // This might be a relocation for an ABS symbols like __global_pointer$ on
       // RISC-V
diff --git a/bolt/test/AArch64/Inputs/build_id.ldscript b/bolt/test/AArch64/Inputs/build_id.ldscript
new file mode 100644
index 0000000000000..0af8e960f491b
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/build_id.ldscript
@@ -0,0 +1,9 @@
+SECTIONS
+{
+  PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
+  .note.gnu.build-id (0x400400):
+   {
+    build_id_note = ABSOLUTE(.);
+    *(.note.gnu.build-id)
+   }
+}
diff --git a/bolt/test/AArch64/build_id.c b/bolt/test/AArch64/build_id.c
new file mode 100644
index 0000000000000..01e433c7ca8fd
--- /dev/null
+++ b/bolt/test/AArch64/build_id.c
@@ -0,0 +1,25 @@
+// This test checks that referencing build_id through GOT table
+// would result in GOT access after disassembly, not directly
+// to build_id address.
+
+// RUN: %clang %cflags -fuse-ld=lld -Wl,-T,%S/Inputs/build_id.ldscript -Wl,-q \
+// RUN:   -Wl,--no-relax -Wl,--build-id=sha1 %s -o %t.exe
+// RUN: llvm-bolt -print-disasm --print-only=get_build_id %t.exe -o %t.bolt | \
+// RUN:   FileCheck %s
+
+// CHECK: adrp	[[REG:x[0-28]+]], __BOLT_got_zero
+// CHECK: ldr x{{.*}}, [[[REG]], :lo12:__BOLT_got_zero{{.*}}]
+
+struct build_id_note {
+  char pad[16];
+  char hash[20];
+};
+
+extern const struct build_id_note build_id_note;
+
+__attribute__((noinline)) char get_build_id() { return build_id_note.hash[0]; }
+
+int main() {
+  get_build_id();
+  return 0;
+}
diff --git a/bolt/test/X86/Inputs/build_id.yaml b/bolt/test/X86/Inputs/build_id.yaml
new file mode 100644
index 0000000000000..af012904ff950
--- /dev/null
+++ b/bolt/test/X86/Inputs/build_id.yaml
@@ -0,0 +1,326 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+  Entry:           0x4010A0
+ProgramHeaders:
+  - Type:            PT_PHDR
+    Flags:           [ PF_R ]
+    VAddr:           0x400040
+    Align:           0x8
+    Offset:          0x40
+  - Type:            PT_INTERP
+    Flags:           [ PF_R ]
+    FirstSec:        .interp
+    LastSec:         .interp
+    VAddr:           0x400444
+    Offset:          0x444
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x401000
+    Align:           0x1000
+    Offset:          0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .rodata
+    VAddr:           0x402000
+    Align:           0x1000
+    Offset:          0x2000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x403DD8
+    Align:           0x1000
+    Offset:          0x2DD8
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x403DE8
+    Align:           0x8
+    Offset:          0x2DE8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.ABI-tag
+    VAddr:           0x400400
+    Align:           0x4
+    Offset:          0x400
+Sections:
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400400
+    AddressAlign:    0x4
+    Offset:          0x400
+    Notes:
+      - Name:            GNU
+        Desc:            3C34F7D1612996940C48F98DC272543BC3C9C956
+        Type:            NT_PRPSINFO
+  - Name:            .note.ABI-tag
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400424
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            '00000000030000000200000000000000'
+        Type:            NT_VERSION
+  - Name:            .interp
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400444
+    AddressAlign:    0x1
+    Content:         2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400460
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x7
+      Shift2:          0x6
+    BloomFilter:     [ 0x810000 ]
+    HashBuckets:     [ 0x7, 0x0 ]
+    HashValues:      [ 0x6DCE65D1 ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400488
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400548
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4005F2
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 2, 3, 1, 1, 4, 1, 2 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400608
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.3.4
+            Hash:            157882740
+            Flags:           0
+            Other:           4
+          - Name:            GLIBC_2.34
+            Hash:            110530996
+            Flags:           0
+            Other:           3
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           2
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05D92F00004885C07402FFD04883C408C3
+  - Name:            .plt.sec
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401060
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         F30F1EFAF2FF25AD2F00000F1F440000F30F1EFAF2FF25A52F00000F1F440000
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401080
+    AddressAlign:    0x10
+    Content:         F30F1EFA4883EC0831C0E80101000031C04883C408C3662E0F1F840000000000F30F1EFA31ED4989D15E4889E24883E4F050544531C031C9488D3DC1FFFFFFFF15132F0000F4662E0F1F840000000000488D3D612F0000488D055A2F00004839F87415488B05F62E00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D312F0000488D352A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05C52E00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803DED2E000000752B5548833DA22E0000004889E5740C488B3DCE2E0000E8E9FEFFFFE864FFFFFFC605C52E0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFF0F1F8000000000F30F1EFA415455488D2D660E000053488D1D6AF2FFFF4C8D6314660F1F4400000FB6134889EEBF0100000031C04883C301E8AAFEFFFF4C39E375E55BBF0A0000005D415CE987FEFFFF
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x4011DC
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x402000
+    AddressAlign:    0x4
+    Offset:          0x2000
+    Content:         '0100020025303268687800'
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DD8
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2DD8
+    Content:         '8011400000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DE0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '4011400000000000'
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DE8
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x37
+      - Tag:             DT_INIT
+        Value:           0x401000
+      - Tag:             DT_FINI
+        Value:           0x4011DC
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x403DD8
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x403DE0
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x400460
+      - Tag:             DT_STRTAB
+        Value:           0x400548
+      - Tag:             DT_SYMTAB
+        Value:           0x400488
+      - Tag:             DT_STRSZ
+        Value:           0xA9
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_DEBUG
+        Value:           0x0
+      - Tag:             DT_PLTGOT
+        Value:           0x404000
+      - Tag:             DT_PLTRELSZ
+        Value:           0x30
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_FLAGS
+        Value:           0x8
+      - Tag:             DT_FLAGS_1
+        Value:           0x8000001
+      - Tag:             DT_VERNEED
+        Value:           0x400608
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x4005F2
+      - Tag:             DT_RELACOUNT
+        Value:           0x3
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404028
+    AddressAlign:    0x8
+    Content:         '00000000000000003040400000000000'
+  - Name:            .tm_clone_table
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404038
+    AddressAlign:    0x8
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404038
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .rela.text
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .text
+    Relocations:
+      - Offset:          0x40108B
+        Symbol:          print_build_id
+        Type:            R_X86_64_PLT32
+        Addend:          -4
+      - Offset:          0x4010BB
+        Symbol:          main
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x4011A2
+        Symbol:          build_id_note
+        Type:            R_X86_64_PC32
+        Addend:          12
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .note.gnu.build-id
+      - Name:            .note.ABI-tag
+      - Name:            .interp
+      - Name:            .gnu.hash
+      - Name:            .dynsym
+      - Name:            .dynstr
+      - Name:            .gnu.version
+      - Name:            .gnu.version_r
+      - Name:            .init
+      - Name:            .plt.sec
+      - Name:            .text
+      - Name:            .rela.text
+      - Name:            .fini
+      - Name:            .rodata
+      - Name:            .init_array
+      - Name:            .fini_array
+      - Name:            .dynamic
+      - Name:            .data
+      - Name:            .tm_clone_table
+      - Name:            .bss
+      - Name:            .symtab
+      - Name:            .strtab
+      - Name:            .shstrtab
+Symbols:
+  - Name:            print_build_id
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401190
+    Size:            0x49
+  - Name:            _end
+    Section:         .bss
+    Binding:         STB_GLOBAL
+    Value:           0x404040
+  - Name:            _start
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x4010A0
+    Size:            0x26
+  - Name:            __bss_start
+    Section:         .bss
+    Binding:         STB_GLOBAL
+    Value:           0x404038
+  - Name:            main
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401080
+    Size:            0x16
+  - Name:            build_id_note
+    Index:           SHN_ABS
+    Binding:         STB_GLOBAL
+    Value:           0x400400
+...
diff --git a/bolt/test/X86/build_id.test b/bolt/test/X86/build_id.test
new file mode 100644
index 0000000000000..8d28e12dbf94f
--- /dev/null
+++ b/bolt/test/X86/build_id.test
@@ -0,0 +1,8 @@
+// This test checks that relocation addend used to address build_id fields
+// is properly disassembled by BOLT.
+
+RUN: yaml2obj %p/Inputs/build_id.yaml &> %t.exe
+RUN: llvm-bolt -print-disasm --print-only=print_build_id %t.exe -o %t.bolt | \
+RUN:   FileCheck %s
+
+CHECK: leaq build_id_note+16(%rip), %rbx

From 263965ebe237e2f82d714a12a8c9338b46237a33 Mon Sep 17 00:00:00 2001
From: Tomas Matheson <Tomas.Matheson@arm.com>
Date: Sat, 17 Aug 2024 13:36:40 +0100
Subject: [PATCH 198/427] [AArch64] Add a check for invalid default features
 (#104435)

This adds a check that all ExtensionWithMArch which are marked as
implied features for an architecture are also present in the list of
default features. It doesn't make sense to have something mandatory but
not on by default.

There were a number of existing cases that violated this rule, and some
changes to which features are mandatory (indicated by the Implies
field).

This resulted in a bug where if a feature was marked as `Implies` but
was not added to `DefaultExt`, then for `-march=base_arch+nofeat` the
Driver would consider `feat` to have never been added and therefore
would do nothing to disable it (no `-target-feature -feat` would be
added, but the backend would enable the feature by default because of
`Implies`). See
clang/test/Driver/aarch64-negative-modifiers-for-default-features.c.

Note that the processor definitions do not respect the architecture
DefaultExts. These apply only when specifying `-march=<some architecture
version>`. So when a feature is moved from `Implies` to `DefaultExts` on
the Architecture definition, the feature needs to be added to all
processor definitions (that are based on that architecture) in order to
preserve the existing behaviour. I have checked the TRMs for many cases
(see specific commit messages) but in other cases I have just kept the
current behaviour and not tried to fix it.
---
 clang/test/CodeGen/aarch64-targetattr.c       | 12 +--
 ...-negative-modifiers-for-default-features.c | 12 +++
 clang/test/Driver/arm-sb.c                    |  2 +-
 .../aarch64-apple-a12.c                       |  1 -
 .../aarch64-apple-a13.c                       |  1 -
 .../aarch64-apple-a14.c                       |  1 -
 .../aarch64-apple-a15.c                       |  1 -
 .../aarch64-apple-a16.c                       |  1 -
 .../aarch64-apple-a17.c                       |  1 -
 .../aarch64-apple-m4.c                        |  2 -
 .../aarch64-cortex-r82.c                      |  1 -
 .../aarch64-cortex-r82ae.c                    |  1 -
 llvm/lib/Target/AArch64/AArch64Features.td    | 19 ++--
 llvm/lib/Target/AArch64/AArch64Processors.td  | 46 +++++++--
 llvm/test/MC/AArch64/arm64-system-encoding.s  |  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs-error.s    |  2 +-
 llvm/test/MC/AArch64/armv8.5a-ssbs.s          |  2 +-
 .../MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 .../AArch64/basic-a64-instructions.txt        |  2 +-
 .../TargetParser/TargetParserTest.cpp         | 95 +++++++++++--------
 llvm/utils/TableGen/ARMTargetDefEmitter.cpp   | 32 ++++++-
 21 files changed, 154 insertions(+), 84 deletions(-)
 create mode 100644 clang/test/Driver/aarch64-negative-modifiers-for-default-features.c

diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c
index 4f891f938b618..d6227be2ebef8 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -195,19 +195,19 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" }
-// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
-// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
+// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" }
+// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a710" "target-features"="+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+ete,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+ssbs,+sve,+sve2,+sve2-bitperm,+trbe,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" }
 // CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+ete,+fp-armv8,+neon,+trbe,+v8a" }
 // CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" }
 // CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR9]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
-// CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" }
+// CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" }
+// CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" }
 // CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
 // CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16" }
-// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
-// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
+// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
 //.
diff --git a/clang/test/Driver/aarch64-negative-modifiers-for-default-features.c b/clang/test/Driver/aarch64-negative-modifiers-for-default-features.c
new file mode 100644
index 0000000000000..03dd8af7588ca
--- /dev/null
+++ b/clang/test/Driver/aarch64-negative-modifiers-for-default-features.c
@@ -0,0 +1,12 @@
+// Test that default features (e.g. flagm/sb/ssbs for 8.5) can be disabled via -march.
+
+// RUN: %clang --target=aarch64 -march=armv8.5-a+noflagm+nosb+nossbs -c %s -### 2>&1 | FileCheck %s
+// CHECK: "-triple" "aarch64"
+// CHECK-SAME: "-target-feature" "+v8.5a"
+// CHECK-SAME: "-target-feature" "-flagm"
+// CHECK-SAME: "-target-feature" "-sb"
+// CHECK-SAME: "-target-feature" "-ssbs"
+
+// CHECK-NOT: "-target-feature" "+flagm"
+// CHECK-NOT: "-target-feature" "+sb"
+// CHECK-NOT: "-target-feature" "+ssbs"
diff --git a/clang/test/Driver/arm-sb.c b/clang/test/Driver/arm-sb.c
index f2704f33c2711..9c0f381171cb6 100644
--- a/clang/test/Driver/arm-sb.c
+++ b/clang/test/Driver/arm-sb.c
@@ -11,6 +11,6 @@
 
 // RUN: %clang -### -target arm-none-none-eabi %s 2>&1 | FileCheck %s --check-prefix=ABSENT
 // RUN: %clang -### -target aarch64-none-elf %s 2>&1 | FileCheck %s --check-prefix=ABSENT
-// RUN: %clang -### -target aarch64-none-elf -march=armv8.5a+nosb %s 2>&1 | FileCheck %s --check-prefix=ABSENT
+// RUN: %clang -### -target aarch64-none-elf -march=armv8.5a+nosb %s 2>&1 | FileCheck %s --check-prefix=NOSB
 // ABSENT-NOT: "-target-feature" "+sb"
 // ABSENT-NOT: "-target-feature" "-sb"
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c
index 27f066a310708..fcc8b674df33f 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a12.c
@@ -6,7 +6,6 @@
 // CHECK-NEXT:     Architecture Feature(s)                                Description
 // CHECK-NEXT:     FEAT_AES, FEAT_PMULL                                   Enable AES support
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_DPB                                               Enable Armv8.2-A data Cache Clean to Point of Persistence
 // CHECK-NEXT:     FEAT_FCMA                                              Enable Armv8.3-A Floating-point complex number support
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c
index 197b210259951..dae95b1297e14 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a13.c
@@ -7,7 +7,6 @@
 // CHECK-NEXT:     FEAT_AES, FEAT_PMULL                                   Enable AES support
 // CHECK-NEXT:     FEAT_AMUv1                                             Enable Armv8.4-A Activity Monitors extension
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
 // CHECK-NEXT:     FEAT_DPB                                               Enable Armv8.2-A data Cache Clean to Point of Persistence
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c
index f1731ef034a0c..8ddcddede4110 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a14.c
@@ -7,7 +7,6 @@
 // CHECK-NEXT:     FEAT_AES, FEAT_PMULL                                   Enable AES support
 // CHECK-NEXT:     FEAT_AMUv1                                             Enable Armv8.4-A Activity Monitors extension
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c
index 267287eaf7b6e..302661a80db30 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a15.c
@@ -10,7 +10,6 @@
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
 // CHECK-NEXT:     FEAT_BF16                                              Enable BFloat16 Extension
 // CHECK-NEXT:     FEAT_BTI                                               Enable Branch Target Identification
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c
index de382a3497b81..a3f5150f87c28 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a16.c
@@ -10,7 +10,6 @@
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
 // CHECK-NEXT:     FEAT_BF16                                              Enable BFloat16 Extension
 // CHECK-NEXT:     FEAT_BTI                                               Enable Branch Target Identification
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c
index 641aa3f82387b..dec90ffddb6ee 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-a17.c
@@ -10,7 +10,6 @@
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
 // CHECK-NEXT:     FEAT_BF16                                              Enable BFloat16 Extension
 // CHECK-NEXT:     FEAT_BTI                                               Enable Branch Target Identification
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c b/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c
index 5096bc6940520..0a815174033dc 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-apple-m4.c
@@ -10,7 +10,6 @@
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
 // CHECK-NEXT:     FEAT_BF16                                              Enable BFloat16 Extension
 // CHECK-NEXT:     FEAT_BTI                                               Enable Branch Target Identification
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
@@ -51,7 +50,6 @@
 // CHECK-NEXT:     FEAT_SME_F64F64                                        Enable Scalable Matrix Extension (SME) F64F64 instructions
 // CHECK-NEXT:     FEAT_SME_I16I64                                        Enable Scalable Matrix Extension (SME) I16I64 instructions
 // CHECK-NEXT:     FEAT_SPECRES                                           Enable Armv8.5-A execution and data prediction invalidation instructions
-// CHECK-NEXT:     FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
 // CHECK-NEXT:     FEAT_TLBIOS, FEAT_TLBIRANGE                            Enable Armv8.4-A TLB Range and Maintenance instructions
 // CHECK-NEXT:     FEAT_TRF                                               Enable Armv8.4-A Trace extension
 // CHECK-NEXT:     FEAT_UAO                                               Enable Armv8.2-A UAO PState
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c
index 2b85201c2c6fe..9875c6922d379 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82.c
@@ -5,7 +5,6 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:     Architecture Feature(s)                                Description
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c
index 417687b4af287..2db44d7827aad 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-cortex-r82ae.c
@@ -5,7 +5,6 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:     Architecture Feature(s)                                Description
 // CHECK-NEXT:     FEAT_AdvSIMD                                           Enable Advanced SIMD instructions
-// CHECK-NEXT:     FEAT_CCIDX                                             Enable Armv8.3-A Extend of the CCSIDR number of sets
 // CHECK-NEXT:     FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
 // CHECK-NEXT:     FEAT_CSV2_2                                            Enable architectural speculation restriction
 // CHECK-NEXT:     FEAT_DIT                                               Enable Armv8.4-A Data Independent Timing instructions
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 69af6f7efa783..a4f8f8c2d9629 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -787,27 +787,26 @@ def HasV8_2aOps : Architecture64<8, 2, "a", "v8.2a",
   [HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP],
   !listconcat(HasV8_1aOps.DefaultExts, [FeatureRAS])>;
 def HasV8_3aOps : Architecture64<8, 3, "a", "v8.3a",
-  [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureCCIDX,
-    FeatureComplxNum],
+  [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureComplxNum],
   !listconcat(HasV8_2aOps.DefaultExts, [FeatureComplxNum, FeatureJS,
-    FeaturePAuth, FeatureRCPC])>;
+    FeaturePAuth, FeatureRCPC,  FeatureCCIDX])>;
 def HasV8_4aOps : Architecture64<8, 4, "a", "v8.4a",
   [HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureMPAM, FeatureDIT,
     FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, FeatureFlagM,
     FeatureRCPC_IMMO, FeatureLSE2],
-  !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd])>;
+  !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd, FeatureDIT, FeatureFlagM])>;
 def HasV8_5aOps : Architecture64<8, 5, "a", "v8.5a",
   [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
-    FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+    FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
     FeatureBranchTargetId],
-  !listconcat(HasV8_4aOps.DefaultExts, [])>;
+  !listconcat(HasV8_4aOps.DefaultExts, [FeaturePredRes, FeatureSSBS, FeatureBranchTargetId, FeatureSB])>;
 def HasV8_6aOps : Architecture64<8, 6, "a", "v8.6a",
   [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
     FeatureEnhancedCounterVirtualization, FeatureMatMulInt8],
   !listconcat(HasV8_5aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8])>;
 def HasV8_7aOps : Architecture64<8, 7, "a", "v8.7a",
   [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX],
-  !listconcat(HasV8_6aOps.DefaultExts, [])>;
+  !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT])>;
 def HasV8_8aOps : Architecture64<8, 8, "a", "v8.8a",
   [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI],
   !listconcat(HasV8_7aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
@@ -825,7 +824,7 @@ def HasV9_1aOps : Architecture64<9, 1, "a", "v9.1a",
   !listconcat(HasV9_0aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8, FeatureRME])>;
 def HasV9_2aOps : Architecture64<9, 2, "a", "v9.2a",
   [HasV8_7aOps, HasV9_1aOps],
-  !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC])>;
+  !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC, FeatureWFxT])>;
 def HasV9_3aOps : Architecture64<9, 3, "a", "v9.3a",
   [HasV8_8aOps, HasV9_2aOps],
   !listconcat(HasV9_2aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
@@ -842,7 +841,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
     //v8.2
     FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
     //v8.3
-    FeatureCCIDX, FeaturePAuth, FeatureRCPC,
+    FeaturePAuth, FeatureRCPC,
     //v8.4
     FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2,
     FeatureRCPC_IMMO,
@@ -853,7 +852,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
   // For v8-R, we do not enable crypto and align with GCC that enables a more
   // minimal set of optional architecture extensions.
   !listconcat(
-    !listremove(HasV8_5aOps.DefaultExts, [FeatureLSE]),
+    !listremove(HasV8_5aOps.DefaultExts, [FeatureBranchTargetId, FeaturePredRes]),
     [FeatureSSBS, FeatureFullFP16, FeatureFP16FML, FeatureSB]
   )>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 71384a23c49af..6df87fc6a815f 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -688,6 +688,7 @@ def ProcessorFeatures {
                                  FeatureMatMulInt8, FeatureBF16, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2,
                                  FeatureComplxNum, FeatureCRC, FeatureDotProd,
                                  FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE,
@@ -695,6 +696,7 @@ def ProcessorFeatures {
   list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
                                  FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
                                  FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS,
@@ -703,6 +705,7 @@ def ProcessorFeatures {
   list<SubtargetFeature> A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
                                  FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
                                  FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS,
@@ -734,12 +737,14 @@ def ProcessorFeatures {
                                  FeaturePerfMon, FeatureRCPC, FeatureSPE,
                                  FeatureSSBS, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM];
   list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+                                 FeatureCCIDX, FeatureSSBS,
                                  FeatureETE, FeatureMTE, FeatureFP16FML,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8,
                                  FeaturePAuth, FeatureFlagM, FeatureSB, FeatureSVE, FeatureSVE2,
                                  FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,
                                  FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE,
+                                 FeatureCCIDX,
                                  FeatureFP16FML, FeatureSVE, FeatureTRBE,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
                                  FeaturePerfMon, FeatureMatMulInt8, FeatureSPE,
@@ -749,6 +754,7 @@ def ProcessorFeatures {
                                  FeatureJS, FeatureLSE, FeatureRAS,
                                  FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
                                  FeaturePerfMon, FeatureSPE, FeatureSPE_EEF,
                                  FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
@@ -757,6 +763,7 @@ def ProcessorFeatures {
                                  FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS,
                                  FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
                                  FeaturePerfMon, FeatureSPE, FeatureSPE_EEF,
                                  FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
@@ -765,6 +772,7 @@ def ProcessorFeatures {
                                  FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS,
                                  FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> A725 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureETE, FeaturePerfMon, FeatureSPE,
                                  FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE,
                                  FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS,
@@ -800,6 +808,7 @@ def ProcessorFeatures {
                                  FeatureMatMulInt8, FeatureBF16, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeaturePAuth, FeatureSSBS, FeatureSB, FeatureSVE, FeatureSVE2, FeatureFlagM,
                                  FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16,
                                  FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
@@ -808,6 +817,7 @@ def ProcessorFeatures {
                                  FeatureSPE, FeatureBF16, FeatureMatMulInt8,
                                  FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
                                  FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureSB, FeaturePAuth, FeaturePredRes, FeatureFlagM, FeatureSSBS,
                                  FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS,
                                  FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureDotProd];
@@ -815,11 +825,13 @@ def ProcessorFeatures {
                                  FeaturePerfMon, FeatureETE, FeatureTRBE,
                                  FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
                                  FeatureFP16FML, FeatureSPE_EEF,
+                                 FeatureCCIDX,
                                  FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
                                  FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd,
                                  FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureLSE,
                                  FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureBF16];
   list<SubtargetFeature> X925 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureCCIDX,
                                  FeatureETE, FeaturePerfMon, FeatureSPE,
                                  FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE,
                                  FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS,
@@ -863,23 +875,26 @@ def ProcessorFeatures {
   list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
                                      FeatureNEON, FeaturePerfMon, FeatureSHA3,
                                      FeatureFullFP16, FeatureFP16FML,
-                                     FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
-                                     FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
-                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+                                     FeatureComplxNum, FeatureCRC, FeatureJS,
+                                     FeatureLSE, FeaturePAuth,
+                                     FeatureRAS, FeatureRCPC, FeatureRDM,
+                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
   list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
                                      FeatureNEON, FeaturePerfMon, FeatureSHA3,
                                      FeatureFullFP16, FeatureFP16FML,
                                      FeatureHCX,
-                                     FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
-                                     FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
-                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+                                     FeatureComplxNum, FeatureCRC, FeatureJS,
+                                     FeatureLSE, FeaturePAuth,
+                                     FeatureRAS, FeatureRCPC, FeatureRDM,
+                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
   list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
                                      FeatureNEON, FeaturePerfMon, FeatureSHA3,
                                      FeatureFullFP16, FeatureFP16FML,
                                      FeatureHCX,
-                                     FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
-                                     FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
-                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+                                     FeatureComplxNum, FeatureCRC, FeatureJS,
+                                     FeatureLSE, FeaturePAuth,
+                                     FeatureRAS, FeatureRCPC, FeatureRDM,
+                                     FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
   list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8,
                                     FeatureNEON, FeaturePerfMon, FeatureSHA3,
                                     FeatureFullFP16, FeatureFP16FML,
@@ -909,6 +924,7 @@ def ProcessorFeatures {
                                        FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
                                        FeatureSVE2BitPerm, FeatureTRBE,
                                        FeaturePerfMon,
+                                       FeatureCCIDX,
                                        FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE,
                                        FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
                                        FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
@@ -916,6 +932,7 @@ def ProcessorFeatures {
                                       FeatureFullFP16, FeatureMTE, FeaturePerfMon,
                                       FeatureRandGen, FeatureSPE, FeatureSPE_EEF,
                                       FeatureSVE2BitPerm,
+                                      FeatureCCIDX,
                                       FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
                                       FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum,
                                       FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
@@ -926,6 +943,7 @@ def ProcessorFeatures {
                                            FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
                                            FeaturePerfMon, FeatureRandGen, FeatureSPE,
                                            FeatureSSBS, FeatureSVE,
+                                           FeatureCCIDX,
                                            FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum,
                                            FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS,
                                            FeatureRCPC, FeatureRDM];
@@ -934,6 +952,7 @@ def ProcessorFeatures {
                                        FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
                                        FeaturePerfMon, FeatureRandGen, FeatureSPE,
                                        FeatureSSBS, FeatureSVE,
+                                       FeatureCCIDX,
                                        FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum,
                                        FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS,
                                        FeatureRCPC, FeatureRDM];
@@ -941,12 +960,14 @@ def ProcessorFeatures {
                                        FeaturePerfMon, FeatureETE, FeatureMatMulInt8,
                                        FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML,
                                        FeatureMTE, FeatureRandGen,
+                                       FeatureCCIDX,
                                        FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd,
                                        FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
                                        FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML,
                                       FeatureFullFP16, FeatureLS64, FeatureMTE,
                                       FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                      FeatureCCIDX,
                                       FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE,
                                       FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
                                       FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
@@ -957,12 +978,14 @@ def ProcessorFeatures {
                                       FeaturePerfMon, FeatureRandGen, FeatureSPE,
                                       FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE,
                                       FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
+                                      FeatureCCIDX,
                                       FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
                                       FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS,
                                       FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM,
                                       FeatureRME];
   list<SubtargetFeature> Saphira    = [HasV8_4aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
                                        FeatureNEON, FeatureSPE, FeaturePerfMon, FeatureCRC,
+                                       FeatureCCIDX,
                                        FeatureLSE, FeatureRDM, FeatureRAS, FeatureRCPC];
   list<SubtargetFeature> ThunderX   = [HasV8_0aOps, FeatureCRC, FeatureSHA2, FeatureAES,
                                        FeatureFPARMv8, FeaturePerfMon, FeatureNEON];
@@ -971,6 +994,7 @@ def ProcessorFeatures {
                                           FeatureRDM];
   list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureSHA2, FeatureAES,
                                           FeatureFPARMv8, FeatureNEON, FeatureLSE,
+                                          FeatureCCIDX,
                                           FeaturePAuth, FeaturePerfMon, FeatureComplxNum,
                                           FeatureJS, FeatureRAS, FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
@@ -983,6 +1007,7 @@ def ProcessorFeatures {
                                     FeatureSHA2, FeatureSHA3, FeatureAES,
                                     FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC,
                                     FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS,
+                                    FeatureCCIDX,
                                     FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
   list<SubtargetFeature> Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
                                      FeatureMTE, FeatureSSBS, FeatureRandGen,
@@ -991,6 +1016,7 @@ def ProcessorFeatures {
                                      FeatureFullFP16, FeatureBF16, FeatureComplxNum,
                                      FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
                                      FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC,
+                                     FeatureCCIDX,
                                      FeatureRDM];
   list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon,
                                      FeatureMTE, FeatureSSBS, FeatureRandGen,
@@ -999,6 +1025,7 @@ def ProcessorFeatures {
                                      FeatureWFxT, FeatureFullFP16, FeatureBF16, FeatureComplxNum,
                                      FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
                                      FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC,
+                                     FeatureCCIDX,
                                      FeatureRDM];
 
   list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
@@ -1007,6 +1034,7 @@ def ProcessorFeatures {
                                      FeatureSHA3, FeatureAES,
                                      FeatureSPE, FeatureBF16, FeatureComplxNum, FeatureCRC,
                                      FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
+                                     FeatureSSBS, FeatureCCIDX,
                                      FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
 
   // ETE and TRBE are future architecture extensions. We temporarily enable them
diff --git a/llvm/test/MC/AArch64/arm64-system-encoding.s b/llvm/test/MC/AArch64/arm64-system-encoding.s
index 313ec91177460..c58a8f0cb841c 100644
--- a/llvm/test/MC/AArch64/arm64-system-encoding.s
+++ b/llvm/test/MC/AArch64/arm64-system-encoding.s
@@ -1,5 +1,5 @@
 ; RUN: not llvm-mc -triple arm64-apple-darwin -show-encoding < %s 2> %t | FileCheck %s
-; RUN: not llvm-mc -triple arm64-apple-darwin -mattr=+v8.3a -show-encoding < %s 2> %t | FileCheck %s --check-prefix=CHECK-V83
+; RUN: not llvm-mc -triple arm64-apple-darwin -mattr=+ccidx -show-encoding < %s 2> %t | FileCheck %s --check-prefix=CHECK-V83
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
 
 foo:
diff --git a/llvm/test/MC/AArch64/armv8.5a-ssbs-error.s b/llvm/test/MC/AArch64/armv8.5a-ssbs-error.s
index a7c9f4c4fbb5c..cd5ab43046c79 100644
--- a/llvm/test/MC/AArch64/armv8.5a-ssbs-error.s
+++ b/llvm/test/MC/AArch64/armv8.5a-ssbs-error.s
@@ -1,5 +1,5 @@
 // RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+ssbs  < %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.5a < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.5a < %s 2>&1 | FileCheck %s --check-prefix=NOSPECID
 // RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-ssbs  < %s 2>&1 | FileCheck %s --check-prefix=NOSPECID
 
 msr SSBS, #16
diff --git a/llvm/test/MC/AArch64/armv8.5a-ssbs.s b/llvm/test/MC/AArch64/armv8.5a-ssbs.s
index ec6670f8ecc34..2a8b7b000646a 100644
--- a/llvm/test/MC/AArch64/armv8.5a-ssbs.s
+++ b/llvm/test/MC/AArch64/armv8.5a-ssbs.s
@@ -1,5 +1,5 @@
 // RUN:     llvm-mc -triple aarch64 -show-encoding -mattr=+ssbs  < %s      | FileCheck %s
-// RUN:     llvm-mc -triple aarch64 -show-encoding -mattr=+v8.5a < %s      | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.5a < %s 2>&1 | FileCheck %s --check-prefix=NOSPECID
 // RUN:     llvm-mc -triple aarch64 -show-encoding -mcpu=cortex-a65 < %s   | FileCheck %s
 // RUN:     llvm-mc -triple aarch64 -show-encoding -mcpu=cortex-a65ae < %s | FileCheck %s
 // RUN:     llvm-mc -triple aarch64 -show-encoding -mcpu=cortex-a76 < %s   | FileCheck %s
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
index 7698751c88076..84d4fa6accccf 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+ssbs  -disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76 -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76ae -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
diff --git a/llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index c76bb0b902096..f46301e8c1c15 100644
--- a/llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/llvm/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple=arm64 -mattr=+v8a,+fp-armv8 -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=arm64 -mattr=+v8a,+fp-armv8,+fullfp16 -disassemble < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
 # RUN: llvm-mc -triple=arm64 -mattr=+v8.2a -disassemble < %s | FileCheck %s --check-prefix=CHECK-V82
-# RUN: llvm-mc -triple=arm64 -mattr=+v8.3a -disassemble < %s | FileCheck %s --check-prefix=CHECK-V83
+# RUN: llvm-mc -triple=arm64 -mattr=+ccidx -disassemble < %s | FileCheck %s --check-prefix=CHECK-V83
 
 #------------------------------------------------------------------------------
 # Add/sub (immediate)
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 9d08dd83684c9..dcbbc68332c79 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1141,7 +1141,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_FP16,        AArch64::AEK_FP16FML,
                               AArch64::AEK_SB,          AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE,         AArch64::AEK_AM}),
+                              AArch64::AEK_ETE,         AArch64::AEK_AM,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a520", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1156,7 +1157,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PERFMON,     AArch64::AEK_PREDRES,
                               AArch64::AEK_JSCVT,       AArch64::AEK_FCMA,
                               AArch64::AEK_PERFMON,     AArch64::AEK_AM,
-                              AArch64::AEK_ETE}),
+                              AArch64::AEK_ETE,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a520ae", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1171,7 +1172,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PERFMON,     AArch64::AEK_PREDRES,
                               AArch64::AEK_JSCVT,       AArch64::AEK_FCMA,
                               AArch64::AEK_PERFMON,     AArch64::AEK_AM,
-                              AArch64::AEK_ETE}),
+                              AArch64::AEK_ETE,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a57", "armv8-a",
                              {AArch64::AEK_CRC, AArch64::AEK_AES,
                               AArch64::AEK_SHA2, AArch64::AEK_FP,
@@ -1256,7 +1257,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_SVE2,    AArch64::AEK_SVE2BITPERM, AArch64::AEK_PAUTH,
              AArch64::AEK_FLAGM,   AArch64::AEK_SB,          AArch64::AEK_I8MM,
              AArch64::AEK_BF16,    AArch64::AEK_JSCVT,       AArch64::AEK_FCMA,
-             AArch64::AEK_PERFMON, AArch64::AEK_ETE}),
+             AArch64::AEK_PERFMON, AArch64::AEK_ETE,         AArch64::AEK_CCIDX,
+             AArch64::AEK_SSBS}),
         AArch64CPUTestParams("cortex-a715", "armv9-a",
                              {AArch64::AEK_CRC,     AArch64::AEK_FP,
                               AArch64::AEK_BF16,    AArch64::AEK_SIMD,
@@ -1271,7 +1273,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_FP16FML, AArch64::AEK_FP16,
                               AArch64::AEK_FLAGM,   AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,    AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE,     AArch64::AEK_TRBE}),
+                              AArch64::AEK_ETE,     AArch64::AEK_TRBE,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a720", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1287,7 +1290,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
                               AArch64::AEK_ETE,         AArch64::AEK_SPE_EEF,
-                              AArch64::AEK_TRBE}),
+                              AArch64::AEK_TRBE,        AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a720ae", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1303,7 +1306,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
                               AArch64::AEK_ETE,         AArch64::AEK_SPE_EEF,
-                              AArch64::AEK_TRBE}),
+                              AArch64::AEK_TRBE,        AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-a725", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1318,7 +1321,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PERFMON,     AArch64::AEK_PREDRES,
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_ETE,
-                              AArch64::AEK_SPE_EEF,     AArch64::AEK_TRBE}),
+                              AArch64::AEK_SPE_EEF,     AArch64::AEK_TRBE,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "neoverse-v1", "armv8.4-a",
             {AArch64::AEK_RAS,     AArch64::AEK_SVE,     AArch64::AEK_SSBS,
@@ -1329,7 +1333,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_SM4,     AArch64::AEK_FP16,    AArch64::AEK_BF16,
              AArch64::AEK_PROFILE, AArch64::AEK_RAND,    AArch64::AEK_FP16FML,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_CCDP}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_CCDP,
+             AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("neoverse-v2", "armv9-a",
                              {AArch64::AEK_RAS,         AArch64::AEK_SVE,
                               AArch64::AEK_SSBS,        AArch64::AEK_RCPC,
@@ -1343,7 +1348,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_SVE2BITPERM, AArch64::AEK_RAND,
                               AArch64::AEK_JSCVT,       AArch64::AEK_FCMA,
                               AArch64::AEK_PAUTH,       AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE}),
+                              AArch64::AEK_ETE,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("neoverse-v3", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1361,7 +1366,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
                               AArch64::AEK_ETE,         AArch64::AEK_SPE_EEF,
-                              AArch64::AEK_RME}),
+                              AArch64::AEK_RME,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("neoverse-v3ae", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1379,7 +1384,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
                               AArch64::AEK_ETE,         AArch64::AEK_SPE_EEF,
-                              AArch64::AEK_RME}),
+                              AArch64::AEK_RME,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "cortex-r82", "armv8-r",
             {AArch64::AEK_CRC, AArch64::AEK_RDM, AArch64::AEK_SSBS,
@@ -1427,7 +1432,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_FP16FML,     AArch64::AEK_FLAGM,
                               AArch64::AEK_JSCVT,       AArch64::AEK_FCMA,
                               AArch64::AEK_PERFMON,     AArch64::AEK_AM,
-                              AArch64::AEK_ETE}),
+                              AArch64::AEK_ETE,         AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-x3", "armv9-a",
                              {AArch64::AEK_CRC,     AArch64::AEK_FP,
                               AArch64::AEK_BF16,    AArch64::AEK_SIMD,
@@ -1442,7 +1447,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PREDRES, AArch64::AEK_FLAGM,
                               AArch64::AEK_SSBS,    AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,    AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE,     AArch64::AEK_TRBE}),
+                              AArch64::AEK_ETE,     AArch64::AEK_TRBE,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-x4", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1458,7 +1464,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PERFMON,
                               AArch64::AEK_ETE,         AArch64::AEK_SPE_EEF,
-                              AArch64::AEK_TRBE}),
+                              AArch64::AEK_TRBE,        AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cortex-x925", "armv9.2-a",
                              {AArch64::AEK_BF16,        AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,         AArch64::AEK_SVE2,
@@ -1473,7 +1479,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_PERFMON,     AArch64::AEK_PREDRES,
                               AArch64::AEK_PROFILE,     AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_ETE,
-                              AArch64::AEK_SPE_EEF,     AArch64::AEK_TRBE}),
+                              AArch64::AEK_SPE_EEF,     AArch64::AEK_TRBE,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("cyclone", "armv8-a",
                              {AArch64::AEK_AES, AArch64::AEK_SHA2,
                               AArch64::AEK_FP, AArch64::AEK_SIMD,
@@ -1591,7 +1598,7 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
              AArch64::AEK_FP16FML, AArch64::AEK_SHA3,    AArch64::AEK_BF16,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_SSBS}),
         AArch64CPUTestParams(
             "apple-m2", "armv8.6-a",
             {AArch64::AEK_CRC,     AArch64::AEK_AES,     AArch64::AEK_SHA2,
@@ -1600,7 +1607,7 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
              AArch64::AEK_FP16FML, AArch64::AEK_SHA3,    AArch64::AEK_BF16,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_SSBS}),
         AArch64CPUTestParams(
             "apple-a16", "armv8.6-a",
             {AArch64::AEK_CRC,     AArch64::AEK_AES,     AArch64::AEK_SHA2,
@@ -1609,7 +1616,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
              AArch64::AEK_FP16FML, AArch64::AEK_SHA3,    AArch64::AEK_BF16,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX,
+             AArch64::AEK_SSBS}),
         AArch64CPUTestParams(
             "apple-m3", "armv8.6-a",
             {AArch64::AEK_CRC,     AArch64::AEK_AES,     AArch64::AEK_SHA2,
@@ -1618,7 +1626,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
              AArch64::AEK_FP16FML, AArch64::AEK_SHA3,    AArch64::AEK_BF16,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX,
+             AArch64::AEK_SSBS}),
         AArch64CPUTestParams(
             "apple-a17", "armv8.6-a",
             {AArch64::AEK_CRC,     AArch64::AEK_AES,     AArch64::AEK_SHA2,
@@ -1627,7 +1636,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
              AArch64::AEK_FP16FML, AArch64::AEK_SHA3,    AArch64::AEK_BF16,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX,
+             AArch64::AEK_SSBS}),
         AArch64CPUTestParams("apple-m4", "armv9.2-a",
                              {AArch64::AEK_CRC,       AArch64::AEK_AES,
                               AArch64::AEK_SHA2,      AArch64::AEK_SHA3,
@@ -1692,7 +1702,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_I8MM,        AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA,        AArch64::AEK_PAUTH,
                               AArch64::AEK_FP16FML,     AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE,         AArch64::AEK_TRBE}),
+                              AArch64::AEK_ETE,         AArch64::AEK_TRBE,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("neoverse-n3", "armv9.2-a",
                              {AArch64::AEK_BF16,    AArch64::AEK_I8MM,
                               AArch64::AEK_SVE,     AArch64::AEK_SVE2,
@@ -1708,7 +1719,8 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_RAND,    AArch64::AEK_SVE2BITPERM,
                               AArch64::AEK_FP16FML, AArch64::AEK_PROFILE,
                               AArch64::AEK_JSCVT,   AArch64::AEK_PERFMON,
-                              AArch64::AEK_ETE,     AArch64::AEK_SPE_EEF}),
+                              AArch64::AEK_ETE,     AArch64::AEK_SPE_EEF,
+                              AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "ampere1", "armv8.6-a",
             {AArch64::AEK_CRC,  AArch64::AEK_FP,    AArch64::AEK_FP16,
@@ -1717,17 +1729,18 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_SHA3, AArch64::AEK_BF16,  AArch64::AEK_SHA2,
              AArch64::AEK_AES,  AArch64::AEK_I8MM,  AArch64::AEK_SSBS,
              AArch64::AEK_SB,   AArch64::AEK_RAND,  AArch64::AEK_JSCVT,
-             AArch64::AEK_FCMA, AArch64::AEK_PAUTH, AArch64::AEK_PERFMON}),
+             AArch64::AEK_FCMA, AArch64::AEK_PAUTH, AArch64::AEK_PERFMON,
+             AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "ampere1a", "armv8.6-a",
-            {AArch64::AEK_CRC,   AArch64::AEK_FP,     AArch64::AEK_FP16,
-             AArch64::AEK_SIMD,  AArch64::AEK_RAS,    AArch64::AEK_LSE,
-             AArch64::AEK_RDM,   AArch64::AEK_RCPC,   AArch64::AEK_DOTPROD,
-             AArch64::AEK_SM4,   AArch64::AEK_SHA3,   AArch64::AEK_BF16,
-             AArch64::AEK_SHA2,  AArch64::AEK_AES,    AArch64::AEK_I8MM,
-             AArch64::AEK_SSBS,  AArch64::AEK_SB,     AArch64::AEK_RAND,
-             AArch64::AEK_MTE,   AArch64::AEK_JSCVT,  AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH, AArch64::AEK_PERFMON}),
+            {AArch64::AEK_CRC,   AArch64::AEK_FP,      AArch64::AEK_FP16,
+             AArch64::AEK_SIMD,  AArch64::AEK_RAS,     AArch64::AEK_LSE,
+             AArch64::AEK_RDM,   AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD,
+             AArch64::AEK_SM4,   AArch64::AEK_SHA3,    AArch64::AEK_BF16,
+             AArch64::AEK_SHA2,  AArch64::AEK_AES,     AArch64::AEK_I8MM,
+             AArch64::AEK_SSBS,  AArch64::AEK_SB,      AArch64::AEK_RAND,
+             AArch64::AEK_MTE,   AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
+             AArch64::AEK_PAUTH, AArch64::AEK_PERFMON, AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "ampere1b", "armv8.7-a",
             {AArch64::AEK_CRC,   AArch64::AEK_FP,    AArch64::AEK_FP16,
@@ -1738,7 +1751,7 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_SSBS,  AArch64::AEK_SB,    AArch64::AEK_RAND,
              AArch64::AEK_MTE,   AArch64::AEK_JSCVT, AArch64::AEK_FCMA,
              AArch64::AEK_PAUTH, AArch64::AEK_CSSC,  AArch64::AEK_PERFMON,
-             AArch64::AEK_WFXT}),
+             AArch64::AEK_WFXT,  AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "neoverse-512tvb", "armv8.4-a",
             {AArch64::AEK_RAS,     AArch64::AEK_SVE,     AArch64::AEK_SSBS,
@@ -1749,7 +1762,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_SM4,     AArch64::AEK_FP16,    AArch64::AEK_BF16,
              AArch64::AEK_PROFILE, AArch64::AEK_RAND,    AArch64::AEK_FP16FML,
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
-             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_CCDP}),
+             AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_CCDP,
+             AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("thunderx2t99", "armv8.1-a",
                              {AArch64::AEK_CRC, AArch64::AEK_AES,
                               AArch64::AEK_SHA2, AArch64::AEK_LSE,
@@ -1762,7 +1776,7 @@ INSTANTIATE_TEST_SUITE_P(
                               AArch64::AEK_SIMD, AArch64::AEK_RAS,
                               AArch64::AEK_RCPC, AArch64::AEK_JSCVT,
                               AArch64::AEK_FCMA, AArch64::AEK_PAUTH,
-                              AArch64::AEK_PERFMON}),
+                              AArch64::AEK_PERFMON, AArch64::AEK_CCIDX}),
         AArch64CPUTestParams("thunderx", "armv8-a",
                              {AArch64::AEK_CRC, AArch64::AEK_AES,
                               AArch64::AEK_SHA2, AArch64::AEK_SIMD,
@@ -1805,7 +1819,7 @@ INSTANTIATE_TEST_SUITE_P(
             {AArch64::AEK_AES, AArch64::AEK_FP, AArch64::AEK_SIMD,
              AArch64::AEK_PERFMON, AArch64::AEK_SHA2, AArch64::AEK_PROFILE,
              AArch64::AEK_CRC, AArch64::AEK_LSE, AArch64::AEK_RDM,
-             AArch64::AEK_RAS, AArch64::AEK_RCPC}),
+             AArch64::AEK_RAS, AArch64::AEK_RCPC, AArch64::AEK_CCIDX}),
         AArch64CPUTestParams(
             "oryon-1", "armv8.6-a",
             {AArch64::AEK_CRC,     AArch64::AEK_FP,      AArch64::AEK_PAUTH,
@@ -1814,7 +1828,8 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_RCPC,    AArch64::AEK_DOTPROD, AArch64::AEK_SM4,
              AArch64::AEK_SHA3,    AArch64::AEK_BF16,    AArch64::AEK_SHA2,
              AArch64::AEK_AES,     AArch64::AEK_I8MM,    AArch64::AEK_RAND,
-             AArch64::AEK_PROFILE, AArch64::AEK_PERFMON})),
+             AArch64::AEK_PROFILE, AArch64::AEK_PERFMON, AArch64::AEK_CCIDX,
+             AArch64::AEK_SSBS})),
 
     AArch64CPUTestParams::PrintToStringParamName);
 
@@ -2248,9 +2263,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
 }
 
 TEST(TargetParserTest, AArch64PrintSupportedExtensions) {
-  std::string expected =
-      "All available -march extensions for AArch64\n\n"
-      "    Name                Architecture Feature(s)                                Description\n";
+  std::string expected = "All available -march extensions for AArch64\n\n"
+                         "    Name                Architecture Feature(s)      "
+                         "                          Description\n";
 
   outs().flush();
   testing::internal::CaptureStdout();
diff --git a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
index a4b25025b3c61..71ca331461c00 100644
--- a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
@@ -19,10 +19,38 @@
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <cstdint>
+#include <set>
 #include <string>
 
 using namespace llvm;
 
+/// Collect the full set of implied features for a SubtargetFeature.
+static void CollectImpliedFeatures(std::set<Record *> &SeenFeats, Record *Rec) {
+  assert(Rec->isSubClassOf("SubtargetFeature") &&
+         "Rec is not a SubtargetFeature");
+
+  SeenFeats.insert(Rec);
+  for (Record *Implied : Rec->getValueAsListOfDefs("Implies"))
+    CollectImpliedFeatures(SeenFeats, Implied);
+}
+
+static void CheckFeatureTree(Record *Root) {
+  std::set<Record *> SeenFeats;
+  CollectImpliedFeatures(SeenFeats, Root);
+
+  // Check that each of the mandatory (implied) features which is an
+  // ExtensionWithMArch is also enabled by default.
+  auto DefaultExtsVec = Root->getValueAsListOfDefs("DefaultExts");
+  std::set<Record *> DefaultExts{DefaultExtsVec.begin(), DefaultExtsVec.end()};
+  for (auto *Feat : SeenFeats) {
+    if (Feat->isSubClassOf("ExtensionWithMArch") && !DefaultExts.count(Feat))
+      PrintFatalError(Root->getLoc(),
+                      "ExtensionWithMArch " + Feat->getName() +
+                          " is implied (mandatory) as a SubtargetFeature, but "
+                          "is not present in DefaultExts");
+  }
+}
+
 static void EmitARMTargetDef(RecordKeeper &RK, raw_ostream &OS) {
   OS << "// Autogenerated by ARMTargetDefEmitter.cpp\n\n";
 
@@ -283,9 +311,7 @@ static void EmitARMTargetDef(RecordKeeper &RK, raw_ostream &OS) {
     auto Profile = Arch->getValueAsString("Profile");
     auto ArchInfo = ArchInfoName(Major, Minor, Profile);
 
-    // The apple-latest alias is backend only, do not expose it to -mcpu.
-    if (Name == "apple-latest")
-      continue;
+    CheckFeatureTree(Arch);
 
     OS << "  {\n"
        << "    \"" << Name << "\",\n"

From c1336c9e3bd6c0887ead386043c547b3a3ed76a9 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 19 Aug 2024 18:50:47 +0100
Subject: [PATCH 199/427] [GlobalISel] Bail out early for big-endian (#103310)

If we continue through the function we can currently hit crashes. We can
bail out early and fall back to SDAG.

Fixes #103032

(cherry picked from commit 05d17a1c705e1053f95b90aa37d91ce4f94a9287)
---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  1 +
 llvm/lib/Target/ARM/ARMCallLowering.cpp       |  9 ++++++++
 llvm/lib/Target/ARM/ARMCallLowering.h         |  2 ++
 .../AArch64/GlobalISel/endian_fallback.ll     | 21 +++++++++++++++++++
 .../ARM/GlobalISel/arm-irtranslator.ll        |  2 +-
 .../ARM/GlobalISel/arm-param-lowering.ll      |  2 +-
 6 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/endian_fallback.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 68a8a273a1b47..eb010afd41b6b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3889,6 +3889,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
                                F.getSubprogram(), &F.getEntryBlock());
     R << "unable to translate in big endian mode";
     reportTranslationError(*MF, *TPC, *ORE, R);
+    return false;
   }
 
   // Release the per-function state when we return, whether we succeeded or not.
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 9cc162d041f48..883808ae981f5 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -50,6 +50,13 @@
 
 using namespace llvm;
 
+// Whether Big-endian GISel is enabled, defaults to off, can be enabled for
+// testing.
+static cl::opt<bool>
+    EnableGISelBigEndian("enable-arm-gisel-bigendian", cl::Hidden,
+                         cl::init(false),
+                         cl::desc("Enable Global-ISel Big Endian Lowering"));
+
 ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
     : CallLowering(&TLI) {}
 
@@ -539,3 +546,5 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
 
   return true;
 }
+
+bool ARMCallLowering::enableBigEndian() const { return EnableGISelBigEndian; }
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h
index 38095617fb4f3..32c95a044d7b7 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -42,6 +42,8 @@ class ARMCallLowering : public CallLowering {
   bool lowerCall(MachineIRBuilder &MIRBuilder,
                  CallLoweringInfo &Info) const override;
 
+  bool enableBigEndian() const override;
+
 private:
   bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
                       ArrayRef<Register> VRegs,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/endian_fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/endian_fallback.ll
new file mode 100644
index 0000000000000..6c27b4dd85f9b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/endian_fallback.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple aarch64-unknown-linux-musl -O0 -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -mtriple aarch64_be-unknown-linux-musl -O0 -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-BE
+
+; Make sure we fall-back to SDAG for BE targets.
+
+; CHECK-LE-NOT: warning: Instruction selection used fallback path for foo
+; CHECK-BE: warning: Instruction selection used fallback path for foo
+
+define <4 x i6> @foo(float %0, <4 x i6> %1) {
+; CHECK-LE-LABEL: foo:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    fmov d0, d1
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    fmov d0, d1
+; CHECK-BE-NEXT:    ret
+  ret <4 x i6> %1
+}
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 411cf78b621f8..dc1d4b289c2ab 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
-; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
+; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -enable-arm-gisel-bigendian -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
 
 define void @test_void_return() {
 ; CHECK-LABEL: name: test_void_return
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
index e8cd182196b62..65586f72c7c19 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ARM -check-prefix=LITTLE
-; RUN: llc -O0 -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ARM -check-prefix=BIG
+; RUN: llc -O0 -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -enable-arm-gisel-bigendian -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ARM -check-prefix=BIG
 ; RUN: llc -O0 -mtriple thumb-unknown -mattr=+vfp2,+v6t2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -check-prefix=THUMB
 
 declare arm_aapcscc ptr @simple_reg_params_target(i32, ptr)

From 6dbc0e236b3e3a651302d079d1c64934976bc0b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 18 Aug 2024 00:44:16 +0300
Subject: [PATCH 200/427] [LLD] [MinGW] Recognize the -rpath option (#102886)

GNU ld silently accepts the -rpath option for Windows targets, as a
no-op.

This has lead to some build systems (and users) passing this option
while building for Windows/MinGW, even if Windows doesn't have any
concept like rpath.

Older versions of Conan did include -rpath in the pkg-config files it
generated, see e.g.

https://github.com/conan-io/conan/blob/17c58f0c61931f9de218ac571cd97a8e0befa68e/conans/client/generators/pkg_config.py#L104-L114
and
https://github.com/conan-io/conan/blob/17c58f0c61931f9de218ac571cd97a8e0befa68e/conans/client/build/compiler_flags.py#L26-L34
- and see https://github.com/mstorsjo/llvm-mingw/issues/300 for user
reports about this issue.

Recognize the option in LLD for MinGW targets, to improve drop-in
compatibility compared to GNU ld, but produce a warning to alert users
that the option really has no effect for these targets.

(cherry picked from commit 69f76c782b554a004078af6909c19a11e3846415)
---
 lld/MinGW/Driver.cpp       | 3 +++
 lld/MinGW/Options.td       | 3 +++
 lld/test/MinGW/driver.test | 6 ++++++
 3 files changed, 12 insertions(+)

diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 35fd478a21905..c7d7b9cfca386 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -448,6 +448,9 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
       add("-errorlimit:" + s);
   }
 
+  if (auto *a = args.getLastArg(OPT_rpath))
+    warn("parameter " + a->getSpelling() + " has no effect on PE/COFF targets");
+
   for (auto *a : args.filtered(OPT_mllvm))
     add("-mllvm:" + StringRef(a->getValue()));
 
diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td
index 56f67e3dd96c4..7bd5fb80749da 100644
--- a/lld/MinGW/Options.td
+++ b/lld/MinGW/Options.td
@@ -243,6 +243,9 @@ defm: EqNoHelp<"sysroot">;
 def: F<"sort-common">;
 def: F<"start-group">;
 
+// Ignored options, that produce warnings
+defm rpath: EqNoHelp<"rpath">;
+
 // Ignore GCC collect2 LTO plugin related options. Note that we don't support
 // GCC LTO, but GCC collect2 passes these options even in non-LTO mode.
 def: J<"plugin-opt=-fresolution=">;
diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test
index cbccd09793c2f..0dab66b613c77 100644
--- a/lld/test/MinGW/driver.test
+++ b/lld/test/MinGW/driver.test
@@ -446,3 +446,9 @@ RUN: ld.lld -### foo.o -m i386pep --build-id=none 2>&1 | FileCheck -check-prefix
 RUN: ld.lld -### foo.o -m i386pep -s 2>&1 | FileCheck -check-prefix=NO_BUILD_ID %s
 RUN: ld.lld -### foo.o -m i386pep -S 2>&1 | FileCheck -check-prefix=NO_BUILD_ID %s
 NO_BUILD_ID: -build-id:no
+
+RUN: ld.lld -### foo.o -m i386pep -rpath foo 2>&1 | FileCheck -check-prefix=WARN_RPATH %s
+RUN: ld.lld -### foo.o -m i386pep --rpath foo 2>&1 | FileCheck -check-prefix=WARN_RPATH %s
+RUN: ld.lld -### foo.o -m i386pep -rpath=foo 2>&1 | FileCheck -check-prefix=WARN_RPATH %s
+RUN: ld.lld -### foo.o -m i386pep --rpath=foo 2>&1 | FileCheck -check-prefix=WARN_RPATH %s
+WARN_RPATH: warning: parameter -{{-?}}rpath has no effect on PE/COFF targets

From 3ffa5421ca657c04d4df170307c1f9a3c6293003 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 19 Aug 2024 16:54:12 -0400
Subject: [PATCH 201/427] [C++23] Fix infinite recursion (Clang 19.x
 regression) (#104829)

d469794d0cdfd2fea50a6ce0c0e33abb242d744c was fixing an issue with
triggering vtable instantiations, but it accidentally introduced
infinite recursion when the type to be checked is the same as the type
used in a base specifier or field declaration.

Fixes #104802

(cherry picked from commit 435cb0dc5eca08cdd8d9ed0d887fa1693cc2bf33)
---
 clang/lib/Sema/SemaDeclCXX.cpp  |  9 +++++++--
 clang/test/SemaCXX/gh102293.cpp | 27 ++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index ecf8754143a49..92c47be67339e 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7056,11 +7056,16 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
         if (!RD->hasConstexprDestructor())
           return false;
 
+        QualType CanUnqualT = T.getCanonicalType().getUnqualifiedType();
         for (const CXXBaseSpecifier &B : RD->bases())
-          if (!Check(B.getType(), Check))
+          if (B.getType().getCanonicalType().getUnqualifiedType() !=
+                  CanUnqualT &&
+              !Check(B.getType(), Check))
             return false;
         for (const FieldDecl *FD : RD->fields())
-          if (!Check(FD->getType(), Check))
+          if (FD->getType().getCanonicalType().getUnqualifiedType() !=
+                  CanUnqualT &&
+              !Check(FD->getType(), Check))
             return false;
         return true;
       };
diff --git a/clang/test/SemaCXX/gh102293.cpp b/clang/test/SemaCXX/gh102293.cpp
index 30629fc03bf6a..d4218cc13dcec 100644
--- a/clang/test/SemaCXX/gh102293.cpp
+++ b/clang/test/SemaCXX/gh102293.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -std=c++23 -fsyntax-only -verify %s
-// expected-no-diagnostics
 
 template <typename T> static void destroy() {
     T t;
@@ -20,3 +19,29 @@ struct S : HasVT {
   HasD<> v;
 };
 
+// Ensure we don't get infinite recursion from the check, however. See GH104802
+namespace GH104802 {
+class foo {       // expected-note {{definition of 'GH104802::foo' is not complete until the closing '}'}}
+  foo a;          // expected-error {{field has incomplete type 'foo'}}
+
+  virtual int c();
+};
+
+class bar {       // expected-note {{definition of 'GH104802::bar' is not complete until the closing '}'}}
+  const bar a;    // expected-error {{field has incomplete type 'const bar'}}
+
+  virtual int c();
+};
+
+class baz {       // expected-note {{definition of 'GH104802::baz' is not complete until the closing '}'}}
+  typedef class baz blech;
+  blech a;        // expected-error {{field has incomplete type 'blech' (aka 'GH104802::baz')}}
+
+  virtual int c();
+};
+
+class quux : quux { // expected-error {{base class has incomplete type}} \
+                     expected-note {{definition of 'GH104802::quux' is not complete until the closing '}'}}
+  virtual int c();
+};
+}

From 64b8514e6c1a663660fbb93ec7f623b3e40a2020 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Thu, 8 Aug 2024 13:14:09 +0800
Subject: [PATCH 202/427] =?UTF-8?q?Reland=20[C++20]=20[Modules]=20[Itanium?=
 =?UTF-8?q?=20ABI]=20Generate=20the=20vtable=20in=20the=20mod=E2=80=A6=20(?=
 =?UTF-8?q?#102287)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reland https://github.com/llvm/llvm-project/pull/75912

The differences of this PR between
https://github.com/llvm/llvm-project/pull/75912 are:

- Fixed a regression in `Decl::isInAnotherModuleUnit()` in DeclBase.cpp
pointed by @mizvekov and add the corresponding test.
- Fixed the regression in windows
https://github.com/llvm/llvm-project/issues/97447. The changes are in
`CodeGenModule::getVTableLinkage` from
`clang/lib/CodeGen/CGVTables.cpp`. According to the feedbacks from MSVC
devs, the linkage of vtables won't affected by modules. So I simply
skipped the case for MSVC.

Given this is more or less fundamental to the use of modules. I hope we
can backport this to 19.x.

(cherry picked from commit 847f9cb0e868c8ec34f9aa86fdf846f8c4e0388b)
---
 clang/include/clang/AST/DeclBase.h            |   7 ++
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/include/clang/Serialization/ASTReader.h |   6 +
 clang/include/clang/Serialization/ASTWriter.h |   7 ++
 clang/lib/AST/ASTContext.cpp                  |   3 +-
 clang/lib/AST/DeclBase.cpp                    |  34 +++--
 clang/lib/CodeGen/CGVTables.cpp               |  56 +++++----
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |   3 +
 clang/lib/Sema/SemaDecl.cpp                   |   9 ++
 clang/lib/Sema/SemaDeclCXX.cpp                |  14 ++-
 clang/lib/Serialization/ASTReader.cpp         |  11 ++
 clang/lib/Serialization/ASTReaderDecl.cpp     |   7 ++
 clang/lib/Serialization/ASTWriter.cpp         |  33 ++++-
 clang/lib/Serialization/ASTWriterDecl.cpp     |   6 +
 clang/test/CodeGenCXX/modules-vtable.cppm     |  31 +++--
 clang/test/CodeGenCXX/pr70585.cppm            |  47 +++++++
 clang/test/Modules/pr97313.cppm               | 118 ++++++++++++++++++
 .../test/Modules/static-func-in-private.cppm  |   8 ++
 clang/test/Modules/vtable-windows.cppm        |  26 ++++
 19 files changed, 374 insertions(+), 55 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/pr70585.cppm
 create mode 100644 clang/test/Modules/pr97313.cppm
 create mode 100644 clang/test/Modules/static-func-in-private.cppm
 create mode 100644 clang/test/Modules/vtable-windows.cppm

diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 40f01abf384e9..2a4bd0f9c2fda 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -670,6 +670,13 @@ class alignas(8) Decl {
   /// Whether this declaration comes from another module unit.
   bool isInAnotherModuleUnit() const;
 
+  /// Whether this declaration comes from the same module unit being compiled.
+  bool isInCurrentModuleUnit() const;
+
+  /// Whether the definition of the declaration should be emitted in external
+  /// sources.
+  bool shouldEmitInExternalSource() const;
+
   /// Whether this declaration comes from explicit global module.
   bool isFromExplicitGlobalModule() const;
 
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 5dd0ba33f8a9c..9b7e3af0e449b 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -721,6 +721,9 @@ enum ASTRecordTypes {
 
   /// Record code for \#pragma clang unsafe_buffer_usage begin/end
   PP_UNSAFE_BUFFER_USAGE = 69,
+
+  /// Record code for vtables to emit.
+  VTABLES_TO_EMIT = 70,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 76e51ac7ab979..671520a3602b3 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -790,6 +790,11 @@ class ASTReader
   /// the consumer eagerly.
   SmallVector<GlobalDeclID, 16> EagerlyDeserializedDecls;
 
+  /// The IDs of all vtables to emit. The referenced declarations are passed
+  /// to the consumers' HandleVTable eagerly after passing
+  /// EagerlyDeserializedDecls.
+  SmallVector<GlobalDeclID, 16> VTablesToEmit;
+
   /// The IDs of all tentative definitions stored in the chain.
   ///
   /// Sema keeps track of all tentative definitions in a TU because it has to
@@ -1500,6 +1505,7 @@ class ASTReader
   bool isConsumerInterestedIn(Decl *D);
   void PassInterestingDeclsToConsumer();
   void PassInterestingDeclToConsumer(Decl *D);
+  void PassVTableToConsumer(CXXRecordDecl *RD);
 
   void finishPendingActions();
   void diagnoseOdrViolations();
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index a0e475ec9f862..71a7c28047e31 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -500,6 +500,10 @@ class ASTWriter : public ASTDeserializationListener,
   std::vector<SourceRange> NonAffectingRanges;
   std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;
 
+  /// A list of classes which need to emit the VTable in the corresponding
+  /// object file.
+  llvm::SmallVector<CXXRecordDecl *> PendingEmittingVTables;
+
   /// Computes input files that didn't affect compilation of the current module,
   /// and initializes data structures necessary for leaving those files out
   /// during \c SourceManager serialization.
@@ -857,6 +861,8 @@ class ASTWriter : public ASTDeserializationListener,
     return PredefinedDecls.count(D);
   }
 
+  void handleVTable(CXXRecordDecl *RD);
+
 private:
   // ASTDeserializationListener implementation
   void ReaderInitialized(ASTReader *Reader) override;
@@ -951,6 +957,7 @@ class PCHGenerator : public SemaConsumer {
 
   void InitializeSema(Sema &S) override { SemaPtr = &S; }
   void HandleTranslationUnit(ASTContext &Ctx) override;
+  void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); }
   ASTMutationListener *GetASTMutationListener() override;
   ASTDeserializationListener *GetASTDeserializationListener() override;
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 7af9ea7105bb0..3da5e888f2517 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12405,8 +12405,7 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
       !isMSStaticDataMemberInlineDefinition(VD))
     return false;
 
-  // Variables in other module units shouldn't be forced to be emitted.
-  if (VD->isInAnotherModuleUnit())
+  if (VD->shouldEmitInExternalSource())
     return false;
 
   // Variables that can be needed in other TUs are required.
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index bc5a9206c0db2..b59f118380ca4 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1125,20 +1125,36 @@ bool Decl::isInAnotherModuleUnit() const {
   if (!M)
     return false;
 
+  // FIXME or NOTE: maybe we need to be clear about the semantics
+  // of clang header modules. e.g., if this lives in a clang header
+  // module included by the current unit, should we return false
+  // here?
+  //
+  // This is clear for header units as the specification says the
+  // header units live in a synthesised translation unit. So we
+  // can return false here.
   M = M->getTopLevelModule();
-  // FIXME: It is problematic if the header module lives in another module
-  // unit. Consider to fix this by techniques like
-  // ExternalASTSource::hasExternalDefinitions.
-  if (M->isHeaderLikeModule())
+  if (!M->isNamedModule())
     return false;
 
-  // A global module without parent implies that we're parsing the global
-  // module. So it can't be in another module unit.
-  if (M->isGlobalModule())
+  return M != getASTContext().getCurrentNamedModule();
+}
+
+bool Decl::isInCurrentModuleUnit() const {
+  auto *M = getOwningModule();
+
+  if (!M || !M->isNamedModule())
     return false;
 
-  assert(M->isNamedModule() && "New module kind?");
-  return M != getASTContext().getCurrentNamedModule();
+  return M == getASTContext().getCurrentNamedModule();
+}
+
+bool Decl::shouldEmitInExternalSource() const {
+  ExternalASTSource *Source = getASTContext().getExternalSource();
+  if (!Source)
+    return false;
+
+  return Source->hasExternalDefinitions(this) == ExternalASTSource::EK_Always;
 }
 
 bool Decl::isFromExplicitGlobalModule() const {
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 7f729d359b82b..267bdf0982970 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1078,29 +1078,41 @@ llvm::GlobalVariable::LinkageTypes
 CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   if (!RD->isExternallyVisible())
     return llvm::GlobalVariable::InternalLinkage;
-
-  // We're at the end of the translation unit, so the current key
-  // function is fully correct.
-  const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD);
-  if (keyFunction && !RD->hasAttr<DLLImportAttr>()) {
+  
+  // In windows, the linkage of vtable is not related to modules.
+  bool IsInNamedModule = !getTarget().getCXXABI().isMicrosoft() &&
+        RD->isInNamedModule();
+  // If the CXXRecordDecl is not in a module unit, we need to get
+  // its key function. We're at the end of the translation unit, so the current
+  // key function is fully correct.
+  const CXXMethodDecl *keyFunction =
+      IsInNamedModule ? nullptr : Context.getCurrentKeyFunction(RD);
+  if (IsInNamedModule || (keyFunction && !RD->hasAttr<DLLImportAttr>())) {
     // If this class has a key function, use that to determine the
     // linkage of the vtable.
     const FunctionDecl *def = nullptr;
-    if (keyFunction->hasBody(def))
+    if (keyFunction && keyFunction->hasBody(def))
       keyFunction = cast<CXXMethodDecl>(def);
 
-    switch (keyFunction->getTemplateSpecializationKind()) {
-      case TSK_Undeclared:
-      case TSK_ExplicitSpecialization:
+    bool IsExternalDefinition =
+        IsInNamedModule ? RD->shouldEmitInExternalSource() : !def;
+
+    TemplateSpecializationKind Kind =
+        IsInNamedModule ? RD->getTemplateSpecializationKind()
+                        : keyFunction->getTemplateSpecializationKind();
+
+    switch (Kind) {
+    case TSK_Undeclared:
+    case TSK_ExplicitSpecialization:
       assert(
-          (def || CodeGenOpts.OptimizationLevel > 0 ||
+          (IsInNamedModule || def || CodeGenOpts.OptimizationLevel > 0 ||
            CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo) &&
-          "Shouldn't query vtable linkage without key function, "
-          "optimizations, or debug info");
-      if (!def && CodeGenOpts.OptimizationLevel > 0)
+          "Shouldn't query vtable linkage without the class in module units, "
+          "key function, optimizations, or debug info");
+      if (IsExternalDefinition && CodeGenOpts.OptimizationLevel > 0)
         return llvm::GlobalVariable::AvailableExternallyLinkage;
 
-      if (keyFunction->isInlined())
+      if (keyFunction && keyFunction->isInlined())
         return !Context.getLangOpts().AppleKext
                    ? llvm::GlobalVariable::LinkOnceODRLinkage
                    : llvm::Function::InternalLinkage;
@@ -1119,7 +1131,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
 
       case TSK_ExplicitInstantiationDeclaration:
         llvm_unreachable("Should not have been asked to emit this");
-    }
+      }
   }
 
   // -fapple-kext mode does not support weak linkage, so we must use
@@ -1213,22 +1225,20 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
       TSK == TSK_ExplicitInstantiationDefinition)
     return false;
 
+  // Otherwise, if the class is attached to a module, the tables are uniquely
+  // emitted in the object for the module unit in which it is defined.
+  if (RD->isInNamedModule())
+    return RD->shouldEmitInExternalSource();
+
   // Otherwise, if the class doesn't have a key function (possibly
   // anymore), the vtable must be defined here.
   const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD);
   if (!keyFunction)
     return false;
 
-  const FunctionDecl *Def;
   // Otherwise, if we don't have a definition of the key function, the
   // vtable must be defined somewhere else.
-  if (!keyFunction->hasBody(Def))
-    return true;
-
-  assert(Def && "The body of the key function is not assigned to Def?");
-  // If the non-inline key function comes from another module unit, the vtable
-  // must be defined there.
-  return Def->isInAnotherModuleUnit() && !Def->isInlineSpecified();
+  return !keyFunction->hasBody();
 }
 
 /// Given that we're currently at the end of the translation unit, and
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index cd76f8406e7b7..0be92fb2e2757 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2315,6 +2315,9 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
   if (!canSpeculativelyEmitVTableAsBaseClass(RD))
     return false;
 
+  if (RD->shouldEmitInExternalSource())
+    return false;
+
   // For a complete-object vtable (or more specifically, for the VTT), we need
   // to be able to speculatively emit the vtables of all dynamic virtual bases.
   for (const auto &B : RD->vbases()) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index fa4bfe13053d8..d608dd92a4b47 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -18073,6 +18073,15 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
       if (NumInitMethods > 1 || !Def->hasInitMethod())
         Diag(RD->getLocation(), diag::err_sycl_special_type_num_init_method);
     }
+
+    // If we're defining a dynamic class in a module interface unit, we always
+    // need to produce the vtable for it, even if the vtable is not used in the
+    // current TU.
+    //
+    // The case where the current class is not dynamic is handled in
+    // MarkVTableUsed.
+    if (getCurrentModule() && getCurrentModule()->isInterfaceOrPartition())
+      MarkVTableUsed(RD->getLocation(), RD, /*DefinitionRequired=*/true);
   }
 
   // Exit this scope of this tag's definition.
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 92c47be67339e..4e4f91de8cd5a 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -18517,11 +18517,15 @@ bool Sema::DefineUsedVTables() {
 
     bool DefineVTable = true;
 
-    // If this class has a key function, but that key function is
-    // defined in another translation unit, we don't need to emit the
-    // vtable even though we're using it.
     const CXXMethodDecl *KeyFunction = Context.getCurrentKeyFunction(Class);
-    if (KeyFunction && !KeyFunction->hasBody()) {
+    // V-tables for non-template classes with an owning module are always
+    // uniquely emitted in that module.
+    if (Class->isInCurrentModuleUnit()) {
+      DefineVTable = true;
+    } else if (KeyFunction && !KeyFunction->hasBody()) {
+      // If this class has a key function, but that key function is
+      // defined in another translation unit, we don't need to emit the
+      // vtable even though we're using it.
       // The key function is in another translation unit.
       DefineVTable = false;
       TemplateSpecializationKind TSK =
@@ -18566,7 +18570,7 @@ bool Sema::DefineUsedVTables() {
     DefinedAnything = true;
     MarkVirtualMembersReferenced(Loc, Class);
     CXXRecordDecl *Canonical = Class->getCanonicalDecl();
-    if (VTablesUsed[Canonical])
+    if (VTablesUsed[Canonical] && !Class->shouldEmitInExternalSource())
       Consumer.HandleVTable(Class);
 
     // Warn if we're emitting a weak vtable. The vtable will be weak if there is
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 3cb96df12e4da..29aec144aec1a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3921,6 +3921,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       }
       break;
 
+    case VTABLES_TO_EMIT:
+      if (F.Kind == MK_MainFile ||
+          getContext().getLangOpts().BuildingPCHWithObjectFile)
+        for (unsigned I = 0, N = Record.size(); I != N;)
+          VTablesToEmit.push_back(ReadDeclID(F, Record, I));
+      break;
+
     case IMPORTED_MODULES:
       if (!F.isModule()) {
         // If we aren't loading a module (which has its own exports), make
@@ -8110,6 +8117,10 @@ void ASTReader::PassInterestingDeclToConsumer(Decl *D) {
     Consumer->HandleInterestingDecl(DeclGroupRef(D));
 }
 
+void ASTReader::PassVTableToConsumer(CXXRecordDecl *RD) {
+  Consumer->HandleVTable(RD);
+}
+
 void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) {
   this->Consumer = Consumer;
 
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index ccc97f65526d6..c118f3818467d 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4242,6 +4242,13 @@ void ASTReader::PassInterestingDeclsToConsumer() {
 
   // If we add any new potential interesting decl in the last call, consume it.
   ConsumingPotentialInterestingDecls();
+
+  for (GlobalDeclID ID : VTablesToEmit) {
+    auto *RD = cast<CXXRecordDecl>(GetDecl(ID));
+    assert(!RD->shouldEmitInExternalSource());
+    PassVTableToConsumer(RD);
+  }
+  VTablesToEmit.clear();
 }
 
 void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c78d8943d6d92..7c0636962459e 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -927,6 +927,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
   RECORD(PP_ASSUME_NONNULL_LOC);
   RECORD(PP_UNSAFE_BUFFER_USAGE);
+  RECORD(VTABLES_TO_EMIT);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -3961,6 +3962,10 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
     Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
 }
 
+void ASTWriter::handleVTable(CXXRecordDecl *RD) {
+  PendingEmittingVTables.push_back(RD);
+}
+
 //===----------------------------------------------------------------------===//
 // DeclContext's Name Lookup Table Serialization
 //===----------------------------------------------------------------------===//
@@ -5163,6 +5168,13 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
   // Write all of the DeclsToCheckForDeferredDiags.
   for (auto *D : SemaRef.DeclsToCheckForDeferredDiags)
     GetDeclRef(D);
+
+  // Write all classes that need to emit the vtable definitions if required.
+  if (isWritingStdCXXNamedModules())
+    for (CXXRecordDecl *RD : PendingEmittingVTables)
+      GetDeclRef(RD);
+  else
+    PendingEmittingVTables.clear();
 }
 
 void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
@@ -5317,6 +5329,17 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
   }
   if (!DeleteExprsToAnalyze.empty())
     Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze);
+
+  RecordData VTablesToEmit;
+  for (CXXRecordDecl *RD : PendingEmittingVTables) {
+    if (!wasDeclEmitted(RD))
+      continue;
+
+    AddDeclRef(RD, VTablesToEmit);
+  }
+
+  if (!VTablesToEmit.empty())
+    Stream.EmitRecord(VTABLES_TO_EMIT, VTablesToEmit);
 }
 
 ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
@@ -6559,10 +6582,12 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
   // computed.
   Record->push_back(D->getODRHash());
 
-  bool ModulesDebugInfo =
-      Writer->Context->getLangOpts().ModulesDebugInfo && !D->isDependentType();
-  Record->push_back(ModulesDebugInfo);
-  if (ModulesDebugInfo)
+  bool ModulesCodegen =
+      !D->isDependentType() &&
+     (Writer->Context->getLangOpts().ModulesDebugInfo ||
+      D->isInNamedModule());
+  Record->push_back(ModulesCodegen);
+  if (ModulesCodegen)
     Writer->AddDeclRef(D, Writer->ModularCodegenDecls);
 
   // IsLambda bit is already saved.
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 17c774038571e..8a4ca54349e38 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1529,8 +1529,14 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
   if (D->isThisDeclarationADefinition())
     Record.AddCXXDefinitionData(D);
 
+  if (D->isCompleteDefinition() && D->isInNamedModule())
+    Writer.AddDeclRef(D, Writer.ModularCodegenDecls);
+
   // Store (what we currently believe to be) the key function to avoid
   // deserializing every method so we can compute it.
+  //
+  // FIXME: Avoid adding the key function if the class is defined in
+  // module purview since in that case the key function is meaningless.
   if (D->isCompleteDefinition())
     Record.AddDeclRef(Context.getCurrentKeyFunction(D));
 
diff --git a/clang/test/CodeGenCXX/modules-vtable.cppm b/clang/test/CodeGenCXX/modules-vtable.cppm
index fb179b1de4880..5cc3504d72628 100644
--- a/clang/test/CodeGenCXX/modules-vtable.cppm
+++ b/clang/test/CodeGenCXX/modules-vtable.cppm
@@ -24,6 +24,8 @@
 // RUN:     %t/M-A.cppm -o %t/M-A.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 -fmodule-file=M:A=%t/M-A.pcm \
 // RUN:     %t/M-B.cppm  -emit-llvm -o - | FileCheck %t/M-B.cppm
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 \
+// RUN:     %t/M-A.pcm  -emit-llvm -o - | FileCheck %t/M-A.cppm
 
 //--- Mod.cppm
 export module Mod;
@@ -41,9 +43,10 @@ Base::~Base() {}
 // CHECK: @_ZTSW3Mod4Base = constant
 // CHECK: @_ZTIW3Mod4Base = constant
 
-// CHECK-INLINE: @_ZTVW3Mod4Base = linkonce_odr {{.*}}unnamed_addr constant
-// CHECK-INLINE: @_ZTSW3Mod4Base = linkonce_odr {{.*}}constant
-// CHECK-INLINE: @_ZTIW3Mod4Base = linkonce_odr {{.*}}constant
+// With the new Itanium C++ ABI, the linkage of vtables in modules don't need to be linkonce ODR.
+// CHECK-INLINE: @_ZTVW3Mod4Base = {{.*}}unnamed_addr constant
+// CHECK-INLINE: @_ZTSW3Mod4Base = {{.*}}constant
+// CHECK-INLINE: @_ZTIW3Mod4Base = {{.*}}constant
 
 module :private;
 int private_use() {
@@ -58,13 +61,13 @@ int use() {
     return 43;
 }
 
-// CHECK-NOT: @_ZTSW3Mod4Base = constant
-// CHECK-NOT: @_ZTIW3Mod4Base = constant
-// CHECK: @_ZTVW3Mod4Base = external unnamed_addr
+// CHECK-NOT: @_ZTSW3Mod4Base
+// CHECK-NOT: @_ZTIW3Mod4Base
+// CHECK: @_ZTVW3Mod4Base = external
 
-// CHECK-INLINE: @_ZTVW3Mod4Base = linkonce_odr {{.*}}unnamed_addr constant
-// CHECK-INLINE: @_ZTSW3Mod4Base = linkonce_odr {{.*}}constant
-// CHECK-INLINE: @_ZTIW3Mod4Base = linkonce_odr {{.*}}constant
+// CHECK-INLINE-NOT: @_ZTSW3Mod4Base
+// CHECK-INLINE-NOT: @_ZTIW3Mod4Base
+// CHECK-INLINE: @_ZTVW3Mod4Base = external
 
 // Check the case that the declaration of the key function comes from another
 // module unit but the definition of the key function comes from the current
@@ -82,6 +85,10 @@ int a_use() {
     return 43;
 }
 
+// CHECK: @_ZTVW1M1C = unnamed_addr constant
+// CHECK: @_ZTSW1M1C = constant
+// CHECK: @_ZTIW1M1C = constant
+
 //--- M-B.cppm
 export module M:B;
 import :A;
@@ -93,6 +100,6 @@ int b_use() {
     return 43;
 }
 
-// CHECK: @_ZTVW1M1C = unnamed_addr constant
-// CHECK: @_ZTSW1M1C = constant
-// CHECK: @_ZTIW1M1C = constant
+// CHECK: @_ZTVW1M1C = external
+// CHECK-NOT: @_ZTSW1M1C
+// CHECK-NOT: @_ZTIW1M1C
diff --git a/clang/test/CodeGenCXX/pr70585.cppm b/clang/test/CodeGenCXX/pr70585.cppm
new file mode 100644
index 0000000000000..ad4e13589d86e
--- /dev/null
+++ b/clang/test/CodeGenCXX/pr70585.cppm
@@ -0,0 +1,47 @@
+// REQUIRES: !system-windows
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/layer1.cppm -triple %itanium_abi_triple \
+// RUN:     -emit-module-interface -o %t/foo-layer1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/layer2.cppm -triple %itanium_abi_triple  \
+// RUN:     -emit-module-interface -fmodule-file=foo:layer1=%t/foo-layer1.pcm \
+// RUN:     -o %t/foo-layer2.pcm
+// RUN: %clang_cc1 -std=c++20 %t/foo-layer1.pcm -emit-llvm -o - | FileCheck %t/layer1.cppm
+// RUN: %clang_cc1 -std=c++20 %t/foo-layer2.pcm -emit-llvm -o - \
+// RUN:     -fmodule-file=foo:layer1=%t/foo-layer1.pcm | FileCheck %t/layer2.cppm
+//
+// Check the case about emitting object files from sources directly.
+// RUN: %clang_cc1 -std=c++20 %t/layer1.cppm -triple %itanium_abi_triple \
+// RUN:     -emit-llvm -o - | FileCheck %t/layer1.cppm
+// RUN: %clang_cc1 -std=c++20 %t/layer2.cppm -triple %itanium_abi_triple -emit-llvm  \
+// RUN:     -fmodule-file=foo:layer1=%t/foo-layer1.pcm -o - | FileCheck %t/layer2.cppm
+
+//--- layer1.cppm
+export module foo:layer1;
+struct Fruit {
+    virtual ~Fruit() = default;
+    virtual void eval();
+};
+
+// CHECK-DAG: @_ZTVW3foo5Fruit = unnamed_addr constant
+// CHECK-DAG: @_ZTSW3foo5Fruit = constant
+// CHECK-DAG: @_ZTIW3foo5Fruit = constant
+
+// Testing that:
+// (1) The use of virtual functions won't produce the vtable.
+// (2) The definition of key functions won't produce the vtable.
+//
+//--- layer2.cppm
+export module foo:layer2;
+import :layer1;
+export void layer2_fun() {
+    Fruit *b = new Fruit();
+    b->eval();
+}
+void Fruit::eval() {}
+// CHECK: @_ZTVW3foo5Fruit = external unnamed_addr constant
+// CHECK-NOT: @_ZTSW3foo5Fruit
+// CHECK-NOT: @_ZTIW3foo5Fruit
diff --git a/clang/test/Modules/pr97313.cppm b/clang/test/Modules/pr97313.cppm
new file mode 100644
index 0000000000000..ebbd0ee4e2c65
--- /dev/null
+++ b/clang/test/Modules/pr97313.cppm
@@ -0,0 +1,118 @@
+// REQUIRES: !system-windows
+//
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Base.cppm \
+// RUN:     -emit-module-interface -o %t/Base.pcm
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Sub.cppm \
+// RUN:     -emit-module-interface -o %t/Sub.pcm -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Sub.pcm \
+// RUN:     -emit-llvm -o %t/Sub.pcm -o - -fprebuilt-module-path=%t | \
+// RUN:     FileCheck %t/Sub.cppm
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/main.cpp \
+// RUN:     -emit-llvm -fprebuilt-module-path=%t -o - | FileCheck %t/main.cpp
+//
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Mod.cppm \
+// RUN:     -emit-module-interface -o %t/Mod.pcm
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Mod.pcm \
+// RUN:     -emit-llvm -o - | FileCheck %t/Mod.cppm
+// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Use.cpp \
+// RUN:     -emit-llvm -fprebuilt-module-path=%t -o - | \
+// RUN:     FileCheck %t/Use.cpp
+
+//--- Base.cppm
+export module Base;
+
+export template <class>
+class Base
+{
+public:
+    constexpr Base();
+    constexpr virtual ~Base();
+};
+
+template <class X>
+constexpr Base<X>::Base() = default;
+
+template <class X>
+constexpr Base<X>::~Base() = default;
+
+//--- Sub.cppm
+export module Sub;
+export import Base;
+
+export class Sub : public Base<int>
+{
+};
+
+// CHECK: @_ZTIW4Base4BaseIiE = {{.*}}linkonce_odr
+
+//--- main.cpp
+import Sub;
+
+int main()
+{
+    Base<int> *b = new Sub();
+    delete b;
+}
+
+// CHECK: @_ZTIW4Base4BaseIiE = {{.*}}linkonce_odr
+
+//--- Mod.cppm
+export module Mod;
+
+export class NonTemplate {
+public:
+    virtual ~NonTemplate();
+};
+
+// CHECK: @_ZTIW3Mod11NonTemplate = {{.*}}constant
+
+export template <class C>
+class Template {
+public:
+    virtual ~Template();
+};
+
+export template<>
+class Template<char> {
+public:
+    virtual ~Template();
+};
+
+// CHECK: @_ZTIW3Mod8TemplateIcE = {{.*}}constant
+
+export template class Template<unsigned>;
+
+// CHECK: @_ZTIW3Mod8TemplateIjE = {{.*}}weak_odr
+
+export extern template class Template<double>;
+
+auto v = new Template<signed int>();
+
+// CHECK: @_ZTIW3Mod8TemplateIiE = {{.*}}linkonce_odr
+
+//--- Use.cpp
+import Mod;
+
+auto v1 = new NonTemplate();
+auto v2 = new Template<char>();
+auto v3 = new Template<unsigned>();
+auto v4 = new Template<double>();
+auto v5 = new Template<signed int>();
+auto v6 = new Template<NonTemplate>();
+
+// CHECK: @_ZTVW3Mod11NonTemplate = {{.*}}external
+// CHECK: @_ZTVW3Mod8TemplateIcE = {{.*}}external
+// CHECK: @_ZTVW3Mod8TemplateIjE = {{.*}}weak_odr
+// CHECK: @_ZTSW3Mod8TemplateIjE = {{.*}}weak_odr
+// CHECK: @_ZTIW3Mod8TemplateIjE = {{.*}}weak_odr
+// CHECK: @_ZTVW3Mod8TemplateIdE = {{.*}}external
+// CHECK: @_ZTVW3Mod8TemplateIiE = {{.*}}linkonce_odr
+// CHECK: @_ZTSW3Mod8TemplateIiE = {{.*}}linkonce_odr
+// CHECK: @_ZTIW3Mod8TemplateIiE = {{.*}}linkonce_odr
+// CHECK: @_ZTVW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
+// CHECK: @_ZTSW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
+// CHECK: @_ZTIW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
diff --git a/clang/test/Modules/static-func-in-private.cppm b/clang/test/Modules/static-func-in-private.cppm
new file mode 100644
index 0000000000000..d7ce663f16d52
--- /dev/null
+++ b/clang/test/Modules/static-func-in-private.cppm
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 %s -verify -fsyntax-only
+// expected-no-diagnostics
+export module a;
+module :private;
+static void f() {}
+void g() {
+  f();
+}
diff --git a/clang/test/Modules/vtable-windows.cppm b/clang/test/Modules/vtable-windows.cppm
new file mode 100644
index 0000000000000..dbde24c8a9bdd
--- /dev/null
+++ b/clang/test/Modules/vtable-windows.cppm
@@ -0,0 +1,26 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -triple i686-pc-windows-msvc %t/foo.cppm -emit-module-interface \
+// RUN:   -o %t/foo.pcm
+// RUN: %clang_cc1 -std=c++20 -triple i686-pc-windows-msvc %t/user.cc -fmodule-file=foo=%t/foo.pcm \
+// RUN:   -emit-llvm -o - -disable-llvm-passes | FileCheck %t/user.cc
+
+//--- foo.cppm
+export module foo;
+export struct Fruit {
+    virtual ~Fruit() = default;
+    virtual void eval();
+};
+
+//--- user.cc
+import foo;
+void test() {
+  Fruit *f = new Fruit();
+  f->eval();
+}
+
+// Check that the virtual table is an unnamed_addr constant in comdat that can
+// be merged with the virtual table with other TUs.
+// CHECK: unnamed_addr constant {{.*}}[ptr @"??_R4Fruit@@6B@", ptr @"??_GFruit@@UAEPAXI@Z", ptr @"?eval@Fruit@@UAEXXZ"{{.*}}comdat($"??_7Fruit@@6B@")

From 7e7e8125cfabf7daf5de63612e6f2c646dd8cad3 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn@arm.com>
Date: Sun, 4 Aug 2024 13:27:12 +0100
Subject: [PATCH 203/427] [libunwind] Add GCS support for AArch64 (#99335)

AArch64 GCS (Guarded Control Stack) is similar enough to CET that we can
re-use the existing code that is guarded by _LIBUNWIND_USE_CET, so long
as we also add defines to locate the GCS stack and pop the entries from
it. We also need the jumpto function to exit using br instead of ret, to
prevent it from popping the GCS stack.

GCS support is enabled using the LIBUNWIND_ENABLE_GCS cmake option. This
enables -mbranch-protection=standard, which enables GCS. For the places
we need to use GCS instructions we use the target attribute, as there's
not a command-line option to enable a specific architecture extension.

(cherry picked from commit b32aac4358c1f6639de7c453656cd74fbab75d71)
---
 libunwind/CMakeLists.txt                      |  8 +++++
 libunwind/src/Registers.hpp                   |  7 +++++
 libunwind/src/UnwindCursor.hpp                |  6 ++--
 libunwind/src/UnwindLevel1.c                  | 31 +++++++++++++++++--
 libunwind/src/UnwindRegistersRestore.S        |  2 +-
 libunwind/src/cet_unwind.h                    | 18 +++++++++++
 libunwind/test/CMakeLists.txt                 |  1 +
 .../test/configs/llvm-libunwind-merged.cfg.in |  3 ++
 .../test/configs/llvm-libunwind-shared.cfg.in |  3 ++
 .../test/configs/llvm-libunwind-static.cfg.in |  3 ++
 10 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index b22ade0a7d71e..28d67b0fef92c 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -37,6 +37,7 @@ if (LIBUNWIND_BUILD_32_BITS)
 endif()
 
 option(LIBUNWIND_ENABLE_CET "Build libunwind with CET enabled." OFF)
+option(LIBUNWIND_ENABLE_GCS "Build libunwind with GCS enabled." OFF)
 option(LIBUNWIND_ENABLE_ASSERTIONS "Enable assertions independent of build mode." ON)
 option(LIBUNWIND_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LIBUNWIND_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
@@ -188,6 +189,13 @@ if (LIBUNWIND_ENABLE_CET)
   endif()
 endif()
 
+if (LIBUNWIND_ENABLE_GCS)
+  add_compile_flags_if_supported(-mbranch-protection=standard)
+  if (NOT CXX_SUPPORTS_MBRANCH_PROTECTION_EQ_STANDARD_FLAG)
+    message(SEND_ERROR "Compiler doesn't support GCS -mbranch-protection option!")
+  endif()
+endif()
+
 if (WIN32)
   # The headers lack matching dllexport attributes (_LIBUNWIND_EXPORT);
   # silence the warning instead of cluttering the headers (which aren't
diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index d11ddb3426d52..861e6b5f6f2c5 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -1815,6 +1815,13 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) {
 /// process.
 class _LIBUNWIND_HIDDEN Registers_arm64;
 extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
+
+#if defined(_LIBUNWIND_USE_GCS)
+extern "C" void *__libunwind_cet_get_jump_target() {
+  return reinterpret_cast<void *>(&__libunwind_Registers_arm64_jumpto);
+}
+#endif
+
 class _LIBUNWIND_HIDDEN Registers_arm64 {
 public:
   Registers_arm64();
diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 758557337899e..06e654197351d 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -471,7 +471,7 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor {
   }
 #endif
 
-#if defined(_LIBUNWIND_USE_CET)
+#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS)
   virtual void *get_registers() {
     _LIBUNWIND_ABORT("get_registers not implemented");
   }
@@ -954,7 +954,7 @@ class UnwindCursor : public AbstractUnwindCursor{
   virtual uintptr_t getDataRelBase();
 #endif
 
-#if defined(_LIBUNWIND_USE_CET)
+#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS)
   virtual void *get_registers() { return &_registers; }
 #endif
 
@@ -3005,7 +3005,7 @@ bool UnwindCursor<A, R>::isReadableAddr(const pint_t addr) const {
 }
 #endif
 
-#if defined(_LIBUNWIND_USE_CET)
+#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS)
 extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) {
   AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
   return co->get_registers();
diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c
index 48e7bc3b9e00e..7e785f4d31e71 100644
--- a/libunwind/src/UnwindLevel1.c
+++ b/libunwind/src/UnwindLevel1.c
@@ -44,7 +44,7 @@
 // _LIBUNWIND_POP_CET_SSP is used to adjust CET shadow stack pointer and we
 // directly jump to __libunwind_Registers_x86/x86_64_jumpto instead of using
 // a regular function call to avoid pushing to CET shadow stack again.
-#if !defined(_LIBUNWIND_USE_CET)
+#if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS)
 #define __unw_phase2_resume(cursor, fn)                                        \
   do {                                                                         \
     (void)fn;                                                                  \
@@ -72,6 +72,19 @@
     __asm__ volatile("jmpq *%%rdx\n\t" :: "D"(cetRegContext),                  \
                      "d"(cetJumpAddress));                                     \
   } while (0)
+#elif defined(_LIBUNWIND_TARGET_AARCH64)
+#define __cet_ss_step_size 8
+#define __unw_phase2_resume(cursor, fn)                                        \
+  do {                                                                         \
+    _LIBUNWIND_POP_CET_SSP((fn));                                              \
+    void *cetRegContext = __libunwind_cet_get_registers((cursor));             \
+    void *cetJumpAddress = __libunwind_cet_get_jump_target();                  \
+    __asm__ volatile("mov x0, %0\n\t"                                          \
+                     "br %1\n\t"                                               \
+                     :                                                         \
+                     : "r"(cetRegContext), "r"(cetJumpAddress)                 \
+                     : "x0");                                                  \
+  } while (0)
 #endif
 
 static _Unwind_Reason_Code
@@ -170,6 +183,10 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
 }
 extern int __unw_step_stage2(unw_cursor_t *);
 
+#if defined(_LIBUNWIND_USE_GCS)
+// Enable the GCS target feature to permit gcspop instructions to be used.
+__attribute__((target("gcs")))
+#endif
 static _Unwind_Reason_Code
 unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   __unw_init_local(cursor, uc);
@@ -180,8 +197,12 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
   // uc is initialized by __unw_getcontext in the parent frame. The first stack
   // frame walked is unwind_phase2.
   unsigned framesWalked = 1;
-#ifdef _LIBUNWIND_USE_CET
+#if defined(_LIBUNWIND_USE_CET)
   unsigned long shadowStackTop = _get_ssp();
+#elif defined(_LIBUNWIND_USE_GCS)
+  unsigned long shadowStackTop = 0;
+  if (__chkfeat(_CHKFEAT_GCS))
+    shadowStackTop = (unsigned long)__gcspr();
 #endif
   // Walk each frame until we reach where search phase said to stop.
   while (true) {
@@ -238,7 +259,7 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
 // against return address stored in CET shadow stack, if the 2 addresses don't
 // match, it means return address in normal stack has been corrupted, we return
 // _URC_FATAL_PHASE2_ERROR.
-#ifdef _LIBUNWIND_USE_CET
+#if defined(_LIBUNWIND_USE_CET) || defined(_LIBUNWIND_USE_GCS)
     if (shadowStackTop != 0) {
       unw_word_t retInNormalStack;
       __unw_get_reg(cursor, UNW_REG_IP, &retInNormalStack);
@@ -306,6 +327,10 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
   return _URC_FATAL_PHASE2_ERROR;
 }
 
+#if defined(_LIBUNWIND_USE_GCS)
+// Enable the GCS target feature to permit gcspop instructions to be used.
+__attribute__((target("gcs")))
+#endif
 static _Unwind_Reason_Code
 unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
                      _Unwind_Exception *exception_object,
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index 67d9e05711898..e1d6e17549880 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -680,7 +680,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
   ldr    x16,     [x0, #0x0F8]
   ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
   mov    sp,x16                 // restore sp
-  ret    x30                    // jump to pc
+  br     x30                    // jump to pc
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
diff --git a/libunwind/src/cet_unwind.h b/libunwind/src/cet_unwind.h
index c364ed3e12feb..45c11973cb7fa 100644
--- a/libunwind/src/cet_unwind.h
+++ b/libunwind/src/cet_unwind.h
@@ -35,6 +35,24 @@
   } while (0)
 #endif
 
+// On AArch64 we use _LIBUNWIND_USE_GCS to indicate that GCS is supported. We
+// need to guard any use of GCS instructions with __chkfeat though, as GCS may
+// not be enabled.
+#if defined(_LIBUNWIND_TARGET_AARCH64) && defined(__ARM_FEATURE_GCS_DEFAULT)
+#define _LIBUNWIND_USE_GCS 1
+#include <arm_acle.h>
+
+#define _LIBUNWIND_POP_CET_SSP(x)                                              \
+  do {                                                                         \
+    if (__chkfeat(_CHKFEAT_GCS)) {                                             \
+      unsigned tmp = (x);                                                      \
+      while (tmp--)                                                            \
+        __gcspopm();                                                           \
+    }                                                                          \
+  } while (0)
+
+#endif
+
 extern void *__libunwind_cet_get_registers(unw_cursor_t *);
 extern void *__libunwind_cet_get_jump_target(void);
 
diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt
index 19f055f6f93ff..c7b1b3d01d8c7 100644
--- a/libunwind/test/CMakeLists.txt
+++ b/libunwind/test/CMakeLists.txt
@@ -9,6 +9,7 @@ macro(pythonize_bool var)
 endmacro()
 
 pythonize_bool(LIBUNWIND_ENABLE_CET)
+pythonize_bool(LIBUNWIND_ENABLE_GCS)
 pythonize_bool(LIBUNWIND_ENABLE_THREADS)
 pythonize_bool(LIBUNWIND_USES_ARM_EHABI)
 
diff --git a/libunwind/test/configs/llvm-libunwind-merged.cfg.in b/libunwind/test/configs/llvm-libunwind-merged.cfg.in
index 38b79840c9fe2..fafe12962b428 100644
--- a/libunwind/test/configs/llvm-libunwind-merged.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-merged.cfg.in
@@ -11,6 +11,9 @@ link_flags = []
 if @LIBUNWIND_ENABLE_CET@:
     compile_flags.append('-fcf-protection=full')
 
+if @LIBUNWIND_ENABLE_GCS@:
+    compile_flags.append('-mbranch-protection=standard')
+
 # On ELF platforms, link tests with -Wl,--export-dynamic if supported by the linker.
 if len('@CMAKE_EXE_EXPORTS_CXX_FLAG@'):
     link_flags.append('@CMAKE_EXE_EXPORTS_CXX_FLAG@')
diff --git a/libunwind/test/configs/llvm-libunwind-shared.cfg.in b/libunwind/test/configs/llvm-libunwind-shared.cfg.in
index 13896aeb13bc4..f3e40928b525d 100644
--- a/libunwind/test/configs/llvm-libunwind-shared.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-shared.cfg.in
@@ -10,6 +10,9 @@ link_flags = []
 if @LIBUNWIND_ENABLE_CET@:
     compile_flags.append('-fcf-protection=full')
 
+if @LIBUNWIND_ENABLE_GCS@:
+    compile_flags.append('-mbranch-protection=standard')
+
 # On ELF platforms, link tests with -Wl,--export-dynamic if supported by the linker.
 if len('@CMAKE_EXE_EXPORTS_CXX_FLAG@'):
     link_flags.append('@CMAKE_EXE_EXPORTS_CXX_FLAG@')
diff --git a/libunwind/test/configs/llvm-libunwind-static.cfg.in b/libunwind/test/configs/llvm-libunwind-static.cfg.in
index 50b64dc665a5a..a3a65ae82591b 100644
--- a/libunwind/test/configs/llvm-libunwind-static.cfg.in
+++ b/libunwind/test/configs/llvm-libunwind-static.cfg.in
@@ -13,6 +13,9 @@ if @LIBUNWIND_ENABLE_THREADS@:
 if @LIBUNWIND_ENABLE_CET@:
     compile_flags.append('-fcf-protection=full')
 
+if @LIBUNWIND_ENABLE_GCS@:
+    compile_flags.append('-mbranch-protection=standard')
+
 # On ELF platforms, link tests with -Wl,--export-dynamic if supported by the linker.
 if len('@CMAKE_EXE_EXPORTS_CXX_FLAG@'):
     link_flags.append('@CMAKE_EXE_EXPORTS_CXX_FLAG@')

From c3da16b094511e42022e534b5eb665dbc3f8db0f Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn@arm.com>
Date: Mon, 5 Aug 2024 18:54:05 +0100
Subject: [PATCH 204/427] [libunwind] Be more careful about enabling GCS
 (#101973)

We need both GCS to be enabled by the compiler (which we do by checking
if __ARM_FEATURE_GCS_DEFAULT is defined) and for arm_acle.h to define
the GCS intrinsics. Check the latter by checking if _CHKFEAT_GCS is
defined.

(cherry picked from commit c649194a71b47431f2eb2e041435d564e3b51072)
---
 libunwind/src/cet_unwind.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libunwind/src/cet_unwind.h b/libunwind/src/cet_unwind.h
index 45c11973cb7fa..47d7616a7322c 100644
--- a/libunwind/src/cet_unwind.h
+++ b/libunwind/src/cet_unwind.h
@@ -39,9 +39,13 @@
 // need to guard any use of GCS instructions with __chkfeat though, as GCS may
 // not be enabled.
 #if defined(_LIBUNWIND_TARGET_AARCH64) && defined(__ARM_FEATURE_GCS_DEFAULT)
-#define _LIBUNWIND_USE_GCS 1
 #include <arm_acle.h>
 
+// We can only use GCS if arm_acle.h defines the GCS intrinsics.
+#ifdef _CHKFEAT_GCS
+#define _LIBUNWIND_USE_GCS 1
+#endif
+
 #define _LIBUNWIND_POP_CET_SSP(x)                                              \
   do {                                                                         \
     if (__chkfeat(_CHKFEAT_GCS)) {                                             \

From 72d2932da5a7c70885a1fdfaa809ff1ede0984ff Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn@arm.com>
Date: Thu, 8 Aug 2024 11:20:09 +0100
Subject: [PATCH 205/427] [libunwind] Fix problems caused by combining BTI and
 GCS (#102322)

The libunwind assembly files need adjustment in order to work correctly
when both BTI and GCS are both enabled (which will be the case when
using -mbranch-protection=standard):
* __libunwind_Registers_arm64_jumpto can't use br to jump to the return
location, instead we need to use gcspush then ret.
* Because we indirectly call __libunwind_Registers_arm64_jumpto it needs
to start with bti jc.
 * We need to set the GCS GNU property bit when it's enabled.

---------

Co-authored-by: Daniel Kiss <daniel.kristof.kiss@gmail.com>
(cherry picked from commit 39529107b46032ef0875ac5b809ab5b60cd15a40)
---
 libunwind/src/UnwindRegistersRestore.S | 16 +++++++++++++++-
 libunwind/src/assembly.h               | 25 ++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index e1d6e17549880..9d34c7909ed37 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -629,6 +629,10 @@ Lnovec:
 
 #elif defined(__aarch64__)
 
+#if defined(__ARM_FEATURE_GCS_DEFAULT)
+.arch_extension gcs
+#endif
+
 //
 // extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *);
 //
@@ -680,7 +684,17 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto)
   ldr    x16,     [x0, #0x0F8]
   ldp    x0, x1,  [x0, #0x000]  // restore x0,x1
   mov    sp,x16                 // restore sp
-  br     x30                    // jump to pc
+#if defined(__ARM_FEATURE_GCS_DEFAULT)
+  // If GCS is enabled we need to push the address we're returning to onto the
+  // GCS stack. We can't just return using br, as there won't be a BTI landing
+  // pad instruction at the destination.
+  mov      x16, #1
+  chkfeat  x16
+  cbnz     x16, Lnogcs
+  gcspushm x30
+Lnogcs:
+#endif
+  ret    x30                    // jump to pc
 
 #elif defined(__arm__) && !defined(__APPLE__)
 
diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h
index fb07d04071af3..f8e83e138eff5 100644
--- a/libunwind/src/assembly.h
+++ b/libunwind/src/assembly.h
@@ -82,7 +82,22 @@
 #define PPC64_OPD2
 #endif
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT)
+#if defined(__aarch64__)
+#if defined(__ARM_FEATURE_GCS_DEFAULT) && defined(__ARM_FEATURE_BTI_DEFAULT)
+// Set BTI, PAC, and GCS gnu property bits
+#define GNU_PROPERTY 7
+// We indirectly branch to __libunwind_Registers_arm64_jumpto from
+// __unw_phase2_resume, so we need to use bti jc.
+#define AARCH64_BTI bti jc
+#elif defined(__ARM_FEATURE_GCS_DEFAULT)
+// Set GCS gnu property bit
+#define GNU_PROPERTY 4
+#elif defined(__ARM_FEATURE_BTI_DEFAULT)
+// Set BTI and PAC gnu property bits
+#define GNU_PROPERTY 3
+#define AARCH64_BTI bti c
+#endif
+#ifdef GNU_PROPERTY
   .pushsection ".note.gnu.property", "a" SEPARATOR                             \
   .balign 8 SEPARATOR                                                          \
   .long 4 SEPARATOR                                                            \
@@ -91,12 +106,12 @@
   .asciz "GNU" SEPARATOR                                                       \
   .long 0xc0000000 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */          \
   .long 4 SEPARATOR                                                            \
-  .long 3 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_BTI AND */               \
-                    /* GNU_PROPERTY_AARCH64_FEATURE_1_PAC */                   \
+  .long GNU_PROPERTY SEPARATOR                                                 \
   .long 0 SEPARATOR                                                            \
   .popsection SEPARATOR
-#define AARCH64_BTI  bti c
-#else
+#endif
+#endif
+#if !defined(AARCH64_BTI)
 #define AARCH64_BTI
 #endif
 

From 437434df21d839becb453f6821564662e9824f02 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 20 Aug 2024 10:06:55 +0200
Subject: [PATCH 206/427] Bump version to 19.1.0-rc3

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 7d5d292957dbe..e6042983c05cb 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -10,6 +10,6 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX -rc2)
+  set(LLVM_VERSION_SUFFIX -rc3)
 endif()
 

From 9301cd5b57c09214256edf19753e2e047a5b5f91 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 30 Jul 2024 10:06:45 +0200
Subject: [PATCH 207/427] [sanitizer_common] Make sanitizer_linux.cpp
 kernel_stat* handling Linux-specific

fcd6bd5587cc376cd8f43b60d1c7d61fdfe0f535 broke the Solaris/sparcv9 buildbot:
```
compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp:39:14: fatal error: 'asm/unistd.h' file not found
   39 | #    include <asm/unistd.h>
      |              ^~~~~~~~~~~~~~
```
That section should have been Linux-specific in the first place, which is
what this patch does.

Tested on sparcv9-sun-solaris2.11.

(cherry picked from commit 16e9bb9cd7f50ae2ec7f29a80bc3b95f528bfdbf)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 8d375ffcd079c..648df0c4e5a76 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -35,7 +35,7 @@
 // access stat from asm/stat.h, without conflicting with definition in
 // sys/stat.h, we use this trick.  sparc64 is similar, using
 // syscall(__NR_stat64) and struct kernel_stat64.
-#  if SANITIZER_MIPS64 || SANITIZER_SPARC64
+#  if SANITIZER_LINUX && (SANITIZER_MIPS64 || SANITIZER_SPARC64)
 #    include <asm/unistd.h>
 #    include <sys/types.h>
 #    define stat kernel_stat

From 9dc4bdf9fd1e4be051fe19998d64230d999b777d Mon Sep 17 00:00:00 2001
From: Ian Anderson <iana@apple.com>
Date: Tue, 20 Aug 2024 03:29:11 -0700
Subject: [PATCH 208/427] [clang][modules] Built-in modules are not correctly
 enabled for Mac Catalyst (#104872)

Mac Catalyst is the iOS platform, but it builds against the macOS SDK
and so it needs to be checking the macOS SDK version instead of the iOS
one. Add tests against a greater-than SDK version just to make sure this
works beyond the initially supporting SDKs.

(cherry picked from commit b9864387d9d00e1d4888181460d05dbc92364d75)
---
 clang/lib/Driver/ToolChains/Darwin.cpp                 | 10 +++++++++-
 .../test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json |  2 +-
 .../test/Driver/Inputs/MacOSX15.1.sdk/SDKSettings.json |  1 +
 clang/test/Driver/darwin-builtin-modules.c             |  3 +++
 4 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/MacOSX15.1.sdk/SDKSettings.json

diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 17d57b2f7eeda..e576efaf5ca88 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2953,7 +2953,15 @@ static bool sdkSupportsBuiltinModules(
   case Darwin::MacOS:
     return SDKVersion >= VersionTuple(15U);
   case Darwin::IPhoneOS:
-    return SDKVersion >= VersionTuple(18U);
+    switch (TargetEnvironment) {
+    case Darwin::MacCatalyst:
+      // Mac Catalyst uses `-target arm64-apple-ios18.0-macabi` so the platform
+      // is iOS, but it builds with the macOS SDK, so it's the macOS SDK version
+      // that's relevant.
+      return SDKVersion >= VersionTuple(15U);
+    default:
+      return SDKVersion >= VersionTuple(18U);
+    }
   case Darwin::TvOS:
     return SDKVersion >= VersionTuple(18U);
   case Darwin::WatchOS:
diff --git a/clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json b/clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json
index 77b70e1a83c19..ced45d5c21996 100644
--- a/clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json
+++ b/clang/test/Driver/Inputs/MacOSX15.0.sdk/SDKSettings.json
@@ -1 +1 @@
-{"Version":"990.0", "MaximumDeploymentTarget": "99.0.99"}
+{"Version":"15.0", "MaximumDeploymentTarget": "15.0.99"}
diff --git a/clang/test/Driver/Inputs/MacOSX15.1.sdk/SDKSettings.json b/clang/test/Driver/Inputs/MacOSX15.1.sdk/SDKSettings.json
new file mode 100644
index 0000000000000..d46295b2ab5a1
--- /dev/null
+++ b/clang/test/Driver/Inputs/MacOSX15.1.sdk/SDKSettings.json
@@ -0,0 +1 @@
+{"Version":"15.1", "MaximumDeploymentTarget": "15.1.99"}
diff --git a/clang/test/Driver/darwin-builtin-modules.c b/clang/test/Driver/darwin-builtin-modules.c
index ec515133be8ab..4564d7317d7ab 100644
--- a/clang/test/Driver/darwin-builtin-modules.c
+++ b/clang/test/Driver/darwin-builtin-modules.c
@@ -8,5 +8,8 @@
 
 // RUN: %clang -isysroot %S/Inputs/MacOSX15.0.sdk -target x86_64-apple-macos14.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
 // RUN: %clang -isysroot %S/Inputs/MacOSX15.0.sdk -target x86_64-apple-macos15.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/MacOSX15.0.sdk -target x86_64-apple-ios18.0-macabi -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/MacOSX15.1.sdk -target x86_64-apple-macos15.1 -darwin-target-variant x86_64-apple-ios18.1-macabi -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
+// RUN: %clang -isysroot %S/Inputs/MacOSX15.1.sdk -target x86_64-apple-ios18.1-macabi -darwin-target-variant x86_64-apple-macos15.1 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
 // RUN: %clang -isysroot %S/Inputs/DriverKit23.0.sdk -target arm64-apple-driverkit23.0 -### %s 2>&1 | FileCheck --check-prefix=CHECK_FUTURE %s
 // CHECK_FUTURE-NOT: -fbuiltin-headers-in-system-modules

From 8ea372d8b628b0a11016f5282d47c372e3843b93 Mon Sep 17 00:00:00 2001
From: Koakuma <koachan@protonmail.com>
Date: Tue, 20 Aug 2024 20:05:06 +0700
Subject: [PATCH 209/427] [SPARC] Remove assertions in printOperand for inline
 asm operands (#104692)

Inline asm operands could contain any kind of relocation, so remove the
checks.

Fixes https://github.com/llvm/llvm-project/issues/103493

(cherry picked from commit 576b7a781aac6b1d60a72248894b50e565e9185a)
---
 llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 51 -----------------------
 llvm/test/CodeGen/SPARC/inlineasm.ll      | 10 +++++
 2 files changed, 10 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 6855471840e9d..71ec01aeb011c 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -314,57 +314,6 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   const MachineOperand &MO = MI->getOperand (opNum);
   SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
 
-#ifndef NDEBUG
-  // Verify the target flags.
-  if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
-    if (MI->getOpcode() == SP::CALL)
-      assert(TF == SparcMCExpr::VK_Sparc_None &&
-             "Cannot handle target flags on call address");
-    else if (MI->getOpcode() == SP::SETHIi)
-      assert((TF == SparcMCExpr::VK_Sparc_HI
-              || TF == SparcMCExpr::VK_Sparc_H44
-              || TF == SparcMCExpr::VK_Sparc_HH
-              || TF == SparcMCExpr::VK_Sparc_LM
-              || TF == SparcMCExpr::VK_Sparc_TLS_GD_HI22
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDM_HI22
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDO_HIX22
-              || TF == SparcMCExpr::VK_Sparc_TLS_IE_HI22
-              || TF == SparcMCExpr::VK_Sparc_TLS_LE_HIX22) &&
-             "Invalid target flags for address operand on sethi");
-    else if (MI->getOpcode() == SP::TLS_CALL)
-      assert((TF == SparcMCExpr::VK_Sparc_None
-              || TF == SparcMCExpr::VK_Sparc_TLS_GD_CALL
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDM_CALL) &&
-             "Cannot handle target flags on tls call address");
-    else if (MI->getOpcode() == SP::TLS_ADDrr)
-      assert((TF == SparcMCExpr::VK_Sparc_TLS_GD_ADD
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDM_ADD
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDO_ADD
-              || TF == SparcMCExpr::VK_Sparc_TLS_IE_ADD) &&
-             "Cannot handle target flags on add for TLS");
-    else if (MI->getOpcode() == SP::TLS_LDrr)
-      assert(TF == SparcMCExpr::VK_Sparc_TLS_IE_LD &&
-             "Cannot handle target flags on ld for TLS");
-    else if (MI->getOpcode() == SP::TLS_LDXrr)
-      assert(TF == SparcMCExpr::VK_Sparc_TLS_IE_LDX &&
-             "Cannot handle target flags on ldx for TLS");
-    else if (MI->getOpcode() == SP::XORri)
-      assert((TF == SparcMCExpr::VK_Sparc_TLS_LDO_LOX10
-              || TF == SparcMCExpr::VK_Sparc_TLS_LE_LOX10) &&
-             "Cannot handle target flags on xor for TLS");
-    else
-      assert((TF == SparcMCExpr::VK_Sparc_LO
-              || TF == SparcMCExpr::VK_Sparc_M44
-              || TF == SparcMCExpr::VK_Sparc_L44
-              || TF == SparcMCExpr::VK_Sparc_HM
-              || TF == SparcMCExpr::VK_Sparc_TLS_GD_LO10
-              || TF == SparcMCExpr::VK_Sparc_TLS_LDM_LO10
-              || TF == SparcMCExpr::VK_Sparc_TLS_IE_LO10 ) &&
-             "Invalid target flags for small address operand");
-  }
-#endif
-
-
   bool CloseParen = SparcMCExpr::printVariantKind(O, TF);
 
   switch (MO.getType()) {
diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll
index 14ea0a2a12602..e2853f03a002e 100644
--- a/llvm/test/CodeGen/SPARC/inlineasm.ll
+++ b/llvm/test/CodeGen/SPARC/inlineasm.ll
@@ -152,3 +152,13 @@ define i64 @test_twinword(){
   %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i0}"()
   ret i64 %1
 }
+
+; CHECK-LABEL: test_symbol:
+; CHECK: ba,a brtarget
+define void @test_symbol() {
+Entry:
+  call void asm sideeffect "ba,a ${0}", "X"(ptr @brtarget)
+  unreachable
+}
+
+declare void @brtarget()

From 6420a2ea06b6fc21547907eb447035be3e2b6b16 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1@ibm.com>
Date: Tue, 20 Aug 2024 10:30:09 -0500
Subject: [PATCH 210/427] Add AIX/PPC Clang/LLVM release notes for LLVM 19.

---
 clang/docs/ReleaseNotes.rst | 17 +++++++++++++++++
 llvm/docs/ReleaseNotes.rst  | 14 ++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 17ddbfe910f87..b68b823ae6761 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1276,6 +1276,14 @@ RISC-V Support
   accesses may be created. ``-m[no-]strict-align`` applies to both scalar and
   vector.
 
+PowerPC Support
+^^^^^^^^^^^^^^^
+
+- Clang now emits errors for impossible ``__attribute__((musttail))``.
+- Added support for ``-mcpu=[pwr11 | power11]`` and ``-mtune=[pwr11 | power11]``.
+- Added support for ``builtin_cpu_supports`` on AIX, along with a subset of
+  features that can be queried.
+
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -1294,6 +1302,14 @@ AIX Support
   base is encoded as an immediate operand.
   This access sequence is not used for TLS variables larger than 32KB, and is
   currently only supported on 64-bit mode.
+- Introduced the options ``-mtocdata/-mno-tocdata`` to enable/disable TOC data
+  transformations for the listed suitable variables.
+- Introduced the ``-maix-shared-lib-tls-model-opt`` option to enable the tuning
+  of changing local-dynamic mode access(es) to initial-exec access(es) at the
+  function level on 64-bit mode.
+- Clang now emits errors for ``-gdwarf-5``.
+- Added the support of the OpenMP runtime libomp on AIX. OpenMP applications can be
+  compiled with ``-fopenmp`` and execute on AIX.
 
 NetBSD Support
 ^^^^^^^^^^^^^^
@@ -1451,6 +1467,7 @@ OpenMP Support
 --------------
 
 - Added support for the `[[omp::assume]]` attribute.
+- AIX added an include directory for ``omp.h`` at ``/opt/IBM/openxlCSDK/include/openmp``.
 
 Additional Information
 ======================
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 60b6c6e786df8..ac7bdf723a168 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -113,6 +113,8 @@ Changes to TableGen
 Changes to Interprocedural Optimizations
 ----------------------------------------
 
+* Hot cold region splitting analysis improvements for overlapping cold regions.
+
 Changes to the AArch64 Backend
 ------------------------------
 
@@ -194,6 +196,16 @@ Changes to the MIPS Backend
 Changes to the PowerPC Backend
 ------------------------------
 
+* PPC big-endian Linux now supports ``-fpatchable-function-entry``.
+* PPC AIX now supports local-dynamic TLS mode.
+* PPC AIX saves the Git revision in binaries when built with LLVM_APPEND_VC_REV=ON.
+* PPC AIX now supports toc-data attribute for large code model.
+* PPC AIX now supports passing arguments by value having greater alignment than
+  the pointer size. Currently only compatible with the IBM XL C compiler.
+* Add support for the per global code model attribute on AIX.
+* Support spilling non-volatile registers for traceback table accuracy on AIX.
+* Codegen improvements and bug fixes.
+
 Changes to the RISC-V Backend
 -----------------------------
 
@@ -436,6 +448,8 @@ Changes to the LLVM tools
   be disabled by ``--no-verify-note-sections``. (`#90458
   <https://github.com/llvm/llvm-project/pull/90458>`).
 
+* llvm-objdump now supports the ``--file-headers`` option for XCOFF object files.
+
 Changes to LLDB
 ---------------------------------
 

From 38f3dbefab0a4965abad99aa23eced96d5d8dc16 Mon Sep 17 00:00:00 2001
From: Bryce Kahle <bryce.kahle@datadoghq.com>
Date: Tue, 20 Aug 2024 12:25:33 -0700
Subject: [PATCH 211/427] use default intrinsic attrs for BPF packet loads

The BPF packet load intrinsics lost attribute WillReturn due to 0b20c30. The attribute loss causes excessive bitshifting, resulting in previously working programs failing the BPF verifier due to instruction/complexity limits.

cherry picked only the BPF changes from 99a10f1

Signed-off-by: Bryce Kahle <bryce.kahle@datadoghq.com>
---
 llvm/include/llvm/IR/IntrinsicsBPF.td      |  6 ++--
 llvm/test/CodeGen/BPF/sockex2.ll           |  2 +-
 llvm/test/Transforms/DCE/intrinsics-bpf.ll | 33 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/DCE/intrinsics-bpf.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index c7ec0916f1d1f..d02eaa6d0dff6 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -13,11 +13,11 @@
 // Specialized loads from packet
 let TargetPrefix = "bpf" in {  // All intrinsics start with "llvm.bpf."
   def int_bpf_load_byte : ClangBuiltin<"__builtin_bpf_load_byte">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_load_half : ClangBuiltin<"__builtin_bpf_load_half">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_load_word : ClangBuiltin<"__builtin_bpf_load_word">,
-              Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
+              DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>;
   def int_bpf_pseudo : ClangBuiltin<"__builtin_bpf_pseudo">,
               Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>;
   def int_bpf_preserve_field_info : ClangBuiltin<"__builtin_bpf_preserve_field_info">,
diff --git a/llvm/test/CodeGen/BPF/sockex2.ll b/llvm/test/CodeGen/BPF/sockex2.ll
index 4131d9dac31d8..b1264099f64c6 100644
--- a/llvm/test/CodeGen/BPF/sockex2.ll
+++ b/llvm/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: r0 = *(u16 *)skb[12] # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: r0 = *(u16 *)skb[16] # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK: implicit-def: $r8
+; CHECK: implicit-def: $r7
 ; CHECK: r1 =
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
 ; CHECK: call 2 # encoding: [0x85,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
diff --git a/llvm/test/Transforms/DCE/intrinsics-bpf.ll b/llvm/test/Transforms/DCE/intrinsics-bpf.ll
new file mode 100644
index 0000000000000..135588ba21cbb
--- /dev/null
+++ b/llvm/test/Transforms/DCE/intrinsics-bpf.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s -passes=dce  | FileCheck %s
+
+declare i64 @llvm.bpf.load.half(ptr, i64)
+declare i64 @llvm.bpf.load.word(ptr, i64)
+declare i64 @llvm.bpf.load.byte(ptr, i64)
+
+define void @test_bpf_load_half(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_half(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.half(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_word(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_word(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.word(ptr %a, i64 %b)
+  ret void
+}
+
+define void @test_bpf_load_byte(ptr %a, i64 %b) {
+; CHECK-LABEL: define void @test_bpf_load_byte(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    ret void
+;
+  %v = call i64 @llvm.bpf.load.byte(ptr %a, i64 %b)
+  ret void
+}

From 43b455b2d2e5107e19d7d47e77ba513d1f9f5e2f Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Sat, 17 Aug 2024 16:52:38 +0900
Subject: [PATCH 212/427] [AMDGPU] Disable inline constants for pseudo scalar
 transcendentals (#104395)

Prevent operand folding from inlining constants into pseudo scalar
transcendental f16 instructions.
However still allow literal constants.

(cherry picked from commit fc6300a5f7ef430e4ec86d16be0b146de7fbd16b)
---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   6 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   4 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |   8 ++
 .../AMDGPU/pseudo-scalar-transcendental.mir   | 120 ++++++++++++++++++
 4 files changed, 138 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index def89c785b855..902f51ae358d5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1289,6 +1289,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
 
+  /// \returns true if inline constants are not supported for F16 pseudo
+  /// scalar transcendentals.
+  bool hasNoF16PseudoScalarTransInlineConstants() const {
+    return getGeneration() == GFX12;
+  }
+
   /// \returns The maximum number of instructions that can be enclosed in an
   /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
   /// instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 463737f645d45..27b8c1b17422a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5768,6 +5768,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
           return false;
       }
     }
+  } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
+             isF16PseudoScalarTrans(MI.getOpcode()) &&
+             isInlineConstant(*MO, OpInfo)) {
+    return false;
   }
 
   if (MO->isReg()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1712dfe8d406c..91855fb14f6f3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,6 +946,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opcode == AMDGPU::DS_GWS_BARRIER;
   }
 
+  static bool isF16PseudoScalarTrans(unsigned Opcode) {
+    return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
+           Opcode == AMDGPU::V_S_LOG_F16_e64 ||
+           Opcode == AMDGPU::V_S_RCP_F16_e64 ||
+           Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
+           Opcode == AMDGPU::V_S_SQRT_F16_e64;
+  }
+
   static bool doesNotReadTiedSource(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
new file mode 100644
index 0000000000000..17bed38bd046d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+# Do not use inline constants for f16 pseudo scalar transcendentals.
+# But allow literal constants.
+
+---
+name: exp_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: exp_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: exp_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: exp_f16_literal
+    ; GCN: [[V_S_EXP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_EXP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_EXP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: log_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: log_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: log_f16_literal
+    ; GCN: [[V_S_LOG_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_LOG_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_LOG_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rcp_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rcp_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rcp_f16_literal
+    ; GCN: [[V_S_RCP_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RCP_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_RCP_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rsq_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: rsq_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: rsq_f16_literal
+    ; GCN: [[V_S_RSQ_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_RSQ_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_RSQ_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_imm
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: sqrt_f16_imm
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 15360
+    ; GCN-NEXT: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 15360
+    %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...
+
+---
+name: sqrt_f16_literal
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; GCN-LABEL: name: sqrt_f16_literal
+    ; GCN: [[V_S_SQRT_F16_e64_:%[0-9]+]]:sgpr_32 = V_S_SQRT_F16_e64 1, 16960, 0, 0, implicit $mode, implicit $exec
+    %0:sgpr_32 = S_MOV_B32 16960
+    %1:sgpr_32 = V_S_SQRT_F16_e64 1, %0:sgpr_32, 0, 0, implicit $mode, implicit $exec
+...

From b6a562d90fa08543171bafbb9c897c03f6cf691f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= <bjorn.a.pettersson@ericsson.com>
Date: Wed, 21 Aug 2024 17:56:27 +0200
Subject: [PATCH 213/427] [DAGCombiner] Fix ReplaceAllUsesOfValueWith mutation
 bug in visitFREEZE (#104924)

In visitFREEZE we have been collecting a set/vector of
MaybePoisonOperands that later was iterated over, applying a freeze to
those operands. However, C-level fuzzy testing has discovered that the
recursiveness of ReplaceAllUsesOfValueWith may cause later operands in
the MaybePoisonOperands vector to be replaced when replacing an earlier
operand. That would then turn up as
   Assertion `N1.getOpcode() != ISD::DELETED_NODE &&
              "Operand is DELETED_NODE!"' failed.
failures when trying to freeze those later operands.

So we need to make sure that the vector with MaybePoisonOperands is
mutated as well when needed. Or as the solution used in this patch, make
sure to keep track of operand numbers that should be frozen instead of
having a vector of SDValues. And then we can refetch the operands while
iterating over operand numbers.

The problem was seen after adding SELECT_CC to the set of operations
including in "AllowMultipleMaybePoisonOperands". I'm not sure, but I
guess that this could happen for other operations as well for which we
allow multiple maybe poison operands.

(cherry picked from commit 278fc8efdf004a1959a31bb4c208df5ee733d5c8)
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 22 ++++++++++---
 .../CodeGen/AArch64/dag-combine-freeze.ll     | 31 +++++++++++++++++++
 2 files changed, 49 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/dag-combine-freeze.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa9032ea2574c..71cdec91e5f67 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15680,13 +15680,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
     }
   }
 
-  SmallSetVector<SDValue, 8> MaybePoisonOperands;
-  for (SDValue Op : N0->ops()) {
+  SmallSet<SDValue, 8> MaybePoisonOperands;
+  SmallVector<unsigned, 8> MaybePoisonOperandNumbers;
+  for (auto [OpNo, Op] : enumerate(N0->ops())) {
     if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
                                              /*Depth*/ 1))
       continue;
     bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
-    bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
+    bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
+    if (IsNewMaybePoisonOperand)
+      MaybePoisonOperandNumbers.push_back(OpNo);
     if (!HadMaybePoisonOperands)
       continue;
     if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
@@ -15698,7 +15701,18 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
   // it could create undef or poison due to it's poison-generating flags.
   // So not finding any maybe-poison operands is fine.
 
-  for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
+  for (unsigned OpNo : MaybePoisonOperandNumbers) {
+    // N0 can mutate during iteration, so make sure to refetch the maybe poison
+    // operands via the operand numbers. The typical scenario is that we have
+    // something like this
+    //   t262: i32 = freeze t181
+    //   t150: i32 = ctlz_zero_undef t262
+    //   t184: i32 = ctlz_zero_undef t181
+    //   t268: i32 = select_cc t181, Constant:i32<0>, t184, t186, setne:ch
+    // When freezing the t181 operand we get t262 back, and then the
+    // ReplaceAllUsesOfValueWith call will not only replace t181 by t262, but
+    // also recursively replace t184 by t150.
+    SDValue MaybePoisonOperand = N->getOperand(0).getOperand(OpNo);
     // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
     if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
       continue;
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-freeze.ll b/llvm/test/CodeGen/AArch64/dag-combine-freeze.ll
new file mode 100644
index 0000000000000..4f0c3d0ce1800
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dag-combine-freeze.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple aarch64 -o /dev/null %s
+
+; This used to fail with:
+;    Assertion `N1.getOpcode() != ISD::DELETED_NODE &&
+;               "Operand is DELETED_NODE!"' failed.
+; Just make sure we do not crash here.
+define void @test_fold_freeze_over_select_cc(i15 %a, ptr %p1, ptr %p2) {
+entry:
+  %a2 = add nsw i15 %a, 1
+  %sext = sext i15 %a2 to i32
+  %ashr = ashr i32 %sext, 31
+  %lshr = lshr i32 %ashr, 7
+  ; Setup an already frozen input to ctlz.
+  %freeze = freeze i32 %lshr
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %freeze, i1 true)
+  store i32 %ctlz, ptr %p1, align 1
+  ; Here is another ctlz, which is used by a frozen select.
+  ; DAGCombiner::visitFREEZE will to try to fold the freeze over a SELECT_CC,
+  ; and when dealing with the condition operand the other SELECT_CC operands
+  ; will be replaced/simplified as well. So the SELECT_CC is mutated while
+  ; freezing the "maybe poison operands". This needs to be handled by
+  ; DAGCombiner::visitFREEZE, as it can't store the list of SDValues that
+  ; should be frozen in a separate data structure that isn't updated when the
+  ; SELECT_CC is mutated.
+  %ctlz1 = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true)
+  %icmp = icmp ne i32 %lshr, 0
+  %select = select i1 %icmp, i32 %ctlz1, i32 0
+  %freeze1 = freeze i32 %select
+  store i32 %freeze1, ptr %p2, align 1
+  ret void
+}

From 1503d18171e569996bf3e107364b1f0fd5f750e9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 20 Aug 2024 11:11:33 +0100
Subject: [PATCH 214/427] [X86] Use correct fp immediate types in _mm_set_ss/sd

Avoids implicit sint_to_fp which wasn't occurring on strict fp codegen

Fixes #104848

(cherry picked from commit 6dcce422ca06601f2b00e85cc18c745ede245ca6)
---
 clang/lib/Headers/emmintrin.h              |  2 +-
 clang/lib/Headers/xmmintrin.h              |  2 +-
 clang/test/CodeGen/X86/strictfp_patterns.c | 26 ++++++++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/strictfp_patterns.c

diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index e85bfc47aa5cc..4dff6421350c0 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -1771,7 +1771,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) {
 ///    lower 64 bits contain the value of the parameter. The upper 64 bits are
 ///    set to zero.
 static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) {
-  return __extension__(__m128d){__w, 0};
+  return __extension__(__m128d){__w, 0.0};
 }
 
 /// Constructs a 128-bit floating-point vector of [2 x double], with each
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 1ef89de9c9f56..6fb27297af927 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1910,7 +1910,7 @@ _mm_undefined_ps(void)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
-  return __extension__ (__m128){ __w, 0, 0, 0 };
+  return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f };
 }
 
 /// Constructs a 128-bit floating-point vector of [4 x float], with each
diff --git a/clang/test/CodeGen/X86/strictfp_patterns.c b/clang/test/CodeGen/X86/strictfp_patterns.c
new file mode 100644
index 0000000000000..55d85f22c3ba3
--- /dev/null
+++ b/clang/test/CodeGen/X86/strictfp_patterns.c
@@ -0,0 +1,26 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -O2 -emit-llvm -o - -triple x86_64-unknown-unknown -ffreestanding -ffp-exception-behavior=maytrap -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+// PR104848 - ensure the _mm_set_ss/d headers don't implicity promote any integer/fp values.
+
+// CHECK-LABEL: @test_mm_set_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[NUM:%.*]], i64 0
+// CHECK-NEXT:    ret <4 x float> [[VECINIT3_I]]
+//
+__m128 test_mm_set_ss(float num)
+{
+    return _mm_set_ss(num);
+}
+
+// CHECK-LABEL: @test_mm_set_sd(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[NUM:%.*]], i64 0
+// CHECK-NEXT:    ret <2 x double> [[VECINIT1_I]]
+//
+__m128d test_mm_set_sd(double num)
+{
+    return _mm_set_sd(num);
+}

From 1241c762c165972690c4edfb82ec7421c1e64658 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Thu, 22 Aug 2024 20:02:48 -0700
Subject: [PATCH 215/427] [clang-format] Don't insert a space between :: and *
 (#105043)

Also, don't insert a space after ::* for method pointers.

See
https://github.com/llvm/llvm-project/pull/86253#issuecomment-2298404887.

Fixes #100841.

(cherry picked from commit 714033a6bf3a81b1350f969ddd83bcd9fbb703e8)
---
 clang/lib/Format/TokenAnnotator.cpp           | 16 +++++----
 clang/unittests/Format/FormatTest.cpp         | 36 +++++++++----------
 clang/unittests/Format/QualifierFixerTest.cpp | 36 +++++++++----------
 3 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 6b9253613788c..851f79895ac5a 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -4467,10 +4467,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   }
   if (Left.is(tok::colon))
     return Left.isNot(TT_ObjCMethodExpr);
-  if (Left.is(tok::coloncolon)) {
-    return Right.is(tok::star) && Right.is(TT_PointerOrReference) &&
-           Style.PointerAlignment != FormatStyle::PAS_Left;
-  }
+  if (Left.is(tok::coloncolon))
+    return false;
   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
     if (Style.Language == FormatStyle::LK_TextProto ||
         (Style.Language == FormatStyle::LK_Proto &&
@@ -4580,8 +4578,14 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
     if (!BeforeLeft)
       return false;
     if (BeforeLeft->is(tok::coloncolon)) {
-      return Left.is(tok::star) &&
-             Style.PointerAlignment != FormatStyle::PAS_Right;
+      if (Left.isNot(tok::star))
+        return false;
+      assert(Style.PointerAlignment != FormatStyle::PAS_Right);
+      if (!Right.startsSequence(tok::identifier, tok::r_paren))
+        return true;
+      assert(Right.Next);
+      const auto *LParen = Right.Next->MatchingParen;
+      return !LParen || LParen->isNot(TT_FunctionTypeLParen);
     }
     return !BeforeLeft->isOneOf(tok::l_paren, tok::l_square);
   }
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 39fcbab3447a7..82af149e048c9 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -3646,8 +3646,8 @@ TEST_F(FormatTest, FormatsClasses) {
                "    : public aaaaaaaaaaaaaaaaaaa<aaaaaaaaaaaaaaaaaaaaa,\n"
                "                                 aaaaaaaaaaaaaaaaaaaaaa> {};");
   verifyFormat("template <class R, class C>\n"
-               "struct Aaaaaaaaaaaaaaaaa<R (C:: *)(int) const>\n"
-               "    : Aaaaaaaaaaaaaaaaa<R (C:: *)(int)> {};");
+               "struct Aaaaaaaaaaaaaaaaa<R (C::*)(int) const>\n"
+               "    : Aaaaaaaaaaaaaaaaa<R (C::*)(int)> {};");
   verifyFormat("class ::A::B {};");
 }
 
@@ -11141,10 +11141,10 @@ TEST_F(FormatTest, UnderstandsBinaryOperators) {
 }
 
 TEST_F(FormatTest, UnderstandsPointersToMembers) {
-  verifyFormat("int A:: *x;");
-  verifyFormat("int (S:: *func)(void *);");
-  verifyFormat("void f() { int (S:: *func)(void *); }");
-  verifyFormat("typedef bool *(Class:: *Member)() const;");
+  verifyFormat("int A::*x;");
+  verifyFormat("int (S::*func)(void *);");
+  verifyFormat("void f() { int (S::*func)(void *); }");
+  verifyFormat("typedef bool *(Class::*Member)() const;");
   verifyFormat("void f() {\n"
                "  (a->*f)();\n"
                "  a->*x;\n"
@@ -11162,16 +11162,16 @@ TEST_F(FormatTest, UnderstandsPointersToMembers) {
 
   FormatStyle Style = getLLVMStyle();
   EXPECT_EQ(Style.PointerAlignment, FormatStyle::PAS_Right);
-  verifyFormat("typedef bool *(Class:: *Member)() const;", Style);
-  verifyFormat("void f(int A:: *p) { int A:: *v = &A::B; }", Style);
+  verifyFormat("typedef bool *(Class::*Member)() const;", Style);
+  verifyFormat("void f(int A::*p) { int A::*v = &A::B; }", Style);
 
   Style.PointerAlignment = FormatStyle::PAS_Left;
-  verifyFormat("typedef bool* (Class::* Member)() const;", Style);
+  verifyFormat("typedef bool* (Class::*Member)() const;", Style);
   verifyFormat("void f(int A::* p) { int A::* v = &A::B; }", Style);
 
   Style.PointerAlignment = FormatStyle::PAS_Middle;
-  verifyFormat("typedef bool * (Class:: * Member)() const;", Style);
-  verifyFormat("void f(int A:: * p) { int A:: * v = &A::B; }", Style);
+  verifyFormat("typedef bool * (Class::*Member)() const;", Style);
+  verifyFormat("void f(int A::* p) { int A::* v = &A::B; }", Style);
 }
 
 TEST_F(FormatTest, UnderstandsUnaryOperators) {
@@ -12514,7 +12514,7 @@ TEST_F(FormatTest, FormatsFunctionTypes) {
   verifyFormat("int (*func)(void *);");
   verifyFormat("void f() { int (*func)(void *); }");
   verifyFormat("template <class CallbackClass>\n"
-               "using Callback = void (CallbackClass:: *)(SomeObject *Data);");
+               "using MyCallback = void (CallbackClass::*)(SomeObject *Data);");
 
   verifyGoogleFormat("A<void*(int*, SomeType*)>;");
   verifyGoogleFormat("void* (*a)(int);");
@@ -19437,13 +19437,13 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) {
                "int   bbbbbbb = 0;",
                Alignment);
   // http://llvm.org/PR68079
-  verifyFormat("using Fn   = int (A:: *)();\n"
-               "using RFn  = int (A:: *)() &;\n"
-               "using RRFn = int (A:: *)() &&;",
+  verifyFormat("using Fn   = int (A::*)();\n"
+               "using RFn  = int (A::*)() &;\n"
+               "using RRFn = int (A::*)() &&;",
                Alignment);
-  verifyFormat("using Fn   = int (A:: *)();\n"
-               "using RFn  = int *(A:: *)() &;\n"
-               "using RRFn = double (A:: *)() &&;",
+  verifyFormat("using Fn   = int (A::*)();\n"
+               "using RFn  = int *(A::*)() &;\n"
+               "using RRFn = double (A::*)() &&;",
                Alignment);
 
   // PAS_Right
diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp
index 3a5f63e5de65b..f9255c6e4c708 100644
--- a/clang/unittests/Format/QualifierFixerTest.cpp
+++ b/clang/unittests/Format/QualifierFixerTest.cpp
@@ -305,7 +305,7 @@ TEST_F(QualifierFixerTest, RightQualifier) {
   verifyFormat("Foo inline static const;", "Foo inline const static;", Style);
   verifyFormat("Foo inline static const;", Style);
 
-  verifyFormat("Foo<T volatile>::Bar<Type const, 5> const volatile A:: *;",
+  verifyFormat("Foo<T volatile>::Bar<Type const, 5> const volatile A::*;",
                "volatile const Foo<volatile T>::Bar<const Type, 5> A::*;",
                Style);
 
@@ -523,15 +523,14 @@ TEST_F(QualifierFixerTest, RightQualifier) {
   verifyFormat("const INTPTR a;", Style);
 
   // Pointers to members
-  verifyFormat("int S:: *a;", Style);
-  verifyFormat("int const S:: *a;", "const int S:: *a;", Style);
-  verifyFormat("int const S:: *const a;", "const int S::* const a;", Style);
-  verifyFormat("int A:: *const A:: *p1;", Style);
-  verifyFormat("float (C:: *p)(int);", Style);
-  verifyFormat("float (C:: *const p)(int);", Style);
-  verifyFormat("float (C:: *p)(int) const;", Style);
-  verifyFormat("float const (C:: *p)(int);", "const float (C::*p)(int);",
-               Style);
+  verifyFormat("int S::*a;", Style);
+  verifyFormat("int const S::*a;", "const int S::*a;", Style);
+  verifyFormat("int const S::*const a;", "const int S::* const a;", Style);
+  verifyFormat("int A::*const A::*p1;", Style);
+  verifyFormat("float (C::*p)(int);", Style);
+  verifyFormat("float (C::*const p)(int);", Style);
+  verifyFormat("float (C::*p)(int) const;", Style);
+  verifyFormat("float const (C::*p)(int);", "const float (C::*p)(int);", Style);
 }
 
 TEST_F(QualifierFixerTest, LeftQualifier) {
@@ -831,15 +830,14 @@ TEST_F(QualifierFixerTest, LeftQualifier) {
   verifyFormat("INTPTR const a;", Style);
 
   // Pointers to members
-  verifyFormat("int S:: *a;", Style);
-  verifyFormat("const int S:: *a;", "int const S:: *a;", Style);
-  verifyFormat("const int S:: *const a;", "int const S::* const a;", Style);
-  verifyFormat("int A:: *const A:: *p1;", Style);
-  verifyFormat("float (C:: *p)(int);", Style);
-  verifyFormat("float (C:: *const p)(int);", Style);
-  verifyFormat("float (C:: *p)(int) const;", Style);
-  verifyFormat("const float (C:: *p)(int);", "float const (C::*p)(int);",
-               Style);
+  verifyFormat("int S::*a;", Style);
+  verifyFormat("const int S::*a;", "int const S::*a;", Style);
+  verifyFormat("const int S::*const a;", "int const S::*const a;", Style);
+  verifyFormat("int A::*const A::*p1;", Style);
+  verifyFormat("float (C::*p)(int);", Style);
+  verifyFormat("float (C::*const p)(int);", Style);
+  verifyFormat("float (C::*p)(int) const;", Style);
+  verifyFormat("const float (C::*p)(int);", "float const (C::*p)(int);", Style);
 }
 
 TEST_F(QualifierFixerTest, ConstVolatileQualifiersOrder) {

From 3ff9d92aae0945daa85ec6f85f05a3aeaaa9f962 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Fri, 23 Aug 2024 16:06:00 +0800
Subject: [PATCH 216/427] [ConstraintElim] Fix miscompilation caused by PR97974
 (#105790)

Fixes https://github.com/llvm/llvm-project/issues/105785.

(cherry picked from commit 85b6aac7c25f9d2a976a76045ace1e7afebb5965)
---
 .../Scalar/ConstraintElimination.cpp          |  2 +-
 .../ConstraintElimination/pr105785.ll         | 46 +++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/ConstraintElimination/pr105785.ll

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index c31173879af1e..37022104d0a9b 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1464,7 +1464,7 @@ static bool checkAndReplaceCmp(CmpIntrinsic *I, ConstraintInfo &Info,
     ToRemove.push_back(I);
     return true;
   }
-  if (checkCondition(ICmpInst::ICMP_EQ, LHS, RHS, I, Info)) {
+  if (checkCondition(ICmpInst::ICMP_EQ, LHS, RHS, I, Info).value_or(false)) {
     I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));
     ToRemove.push_back(I);
     return true;
diff --git a/llvm/test/Transforms/ConstraintElimination/pr105785.ll b/llvm/test/Transforms/ConstraintElimination/pr105785.ll
new file mode 100644
index 0000000000000..6c340a11dd2e2
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/pr105785.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s
+
+define void @pr105785(ptr %p) {
+; CHECK-LABEL: define void @pr105785(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND:.*]]
+; CHECK:       [[FOR_COND]]:
+; CHECK-NEXT:    [[FOR_IND:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[FOR_COND1:.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[FOR_IND]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_COND1]], label %[[FOR_END6:.*]]
+; CHECK:       [[FOR_COND1]]:
+; CHECK-NEXT:    [[FOR_IND2:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY3:.*]] ], [ 0, %[[FOR_COND]] ]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[FOR_IND2]], 3
+; CHECK-NEXT:    br i1 [[CMP2]], label %[[FOR_BODY3]], label %[[FOR_COND]]
+; CHECK:       [[FOR_BODY3]]:
+; CHECK-NEXT:    [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[FOR_IND]], i32 1)
+; CHECK-NEXT:    store i32 [[SCMP]], ptr [[P]], align 4
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[FOR_IND2]], 1
+; CHECK-NEXT:    br label %[[FOR_COND1]]
+; CHECK:       [[FOR_END6]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond1, %entry
+  %for.ind = phi i32 [ 0, %entry ], [ 1, %for.cond1 ]
+  %cmp = icmp eq i32 %for.ind, 0
+  br i1 %cmp, label %for.cond1, label %for.end6
+
+for.cond1:                                        ; preds = %for.cond, %for.body3
+  %for.ind2 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ]
+  %cmp2 = icmp ult i32 %for.ind2, 3
+  br i1 %cmp2, label %for.body3, label %for.cond
+
+for.body3:                                        ; preds = %for.cond1
+  %scmp = call i32 @llvm.scmp.i32.i32(i32 %for.ind, i32 1)
+  store i32 %scmp, ptr %p, align 4
+  %inc = add nuw nsw i32 %for.ind2, 1
+  br label %for.cond1
+
+for.end6:
+  ret void
+}

From cfe8eb89cbb8b8d873579123555a5238d9ad502c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 1 Aug 2024 16:08:33 +0100
Subject: [PATCH 217/427] [MCA][X86] Add missing 512-bit
 vpscatterqd/vscatterqps schedule data (REAPPLIED)

This doesn't match uops.info yet - but it matches the existing vpscatterdq/vscatterqpd entries like uops.info says it should

Reapplied with codegen fix for scatter-schedule.ll

Fixes #105675

(cherry picked from commit cf6cd1fd67356ca0c2972992928592d2430043d2)
---
 llvm/lib/Target/X86/X86SchedIceLake.td        |  2 +
 llvm/lib/Target/X86/X86SchedSkylakeServer.td  |  2 +
 llvm/test/CodeGen/X86/scatter-schedule.ll     |  4 +-
 .../llvm-mca/X86/Generic/resources-avx512.s   | 35 +++++++++++-
 .../llvm-mca/X86/Generic/resources-avx512vl.s | 54 ++++++++++++++++++-
 .../X86/IceLakeServer/resources-avx512.s      | 35 +++++++++++-
 .../X86/IceLakeServer/resources-avx512vl.s    | 54 ++++++++++++++++++-
 .../X86/SapphireRapids/resources-avx512.s     | 35 +++++++++++-
 .../X86/SapphireRapids/resources-avx512vl.s   | 54 ++++++++++++++++++-
 .../X86/SkylakeServer/resources-avx512.s      | 35 +++++++++++-
 .../X86/SkylakeServer/resources-avx512vl.s    | 54 ++++++++++++++++++-
 .../llvm-mca/X86/Znver4/resources-avx512.s    | 35 +++++++++++-
 .../llvm-mca/X86/Znver4/resources-avx512vl.s  | 54 ++++++++++++++++++-
 13 files changed, 441 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 186d4d84c2510..b68be9be6d473 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1510,8 +1510,10 @@ def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4fded44085e89..2423602d06c47 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1499,8 +1499,10 @@ def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015
   let ReleaseAtCycles = [1,8,8,2];
 }
 def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr,
+                                           VPSCATTERQDZmr,
                                            VPSCATTERQQZmr,
                                            VSCATTERDPDZmr,
+                                           VSCATTERQPSZmr,
                                            VSCATTERQPDZmr)>;
 
 def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> {
diff --git a/llvm/test/CodeGen/X86/scatter-schedule.ll b/llvm/test/CodeGen/X86/scatter-schedule.ll
index c841e23eab76b..762a050247a87 100644
--- a/llvm/test/CodeGen/X86/scatter-schedule.ll
+++ b/llvm/test/CodeGen/X86/scatter-schedule.ll
@@ -10,8 +10,8 @@ define void @test(i64 %x272, <16 x ptr> %x335, <16 x i32> %x270) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    kxnorw %k0, %k0, %k2
-; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k2}
+; CHECK-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
 ; CHECK-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
index a8937f7dcfd11..c3453d890d76d 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      29    28.00                       vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  3      5     1.00           *            {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      5     0.50                        vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    0.50    *                   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    0.50    *                   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
@@ -2027,7 +2050,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1506.00 197.00 334.00 16.00 522.00 299.50 299.50
+# CHECK-NEXT:  -     1506.00 198.00 335.00 25.00 523.00 304.00 304.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -2290,6 +2313,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50    -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00   0.50    -     0.50    -      -     {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -     0.50   1.00   0.50   0.50   0.50   {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -     vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50   0.50    -      -     0.50   0.50   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50   0.50    -      -     0.50   0.50   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -2743,6 +2768,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -2806,6 +2835,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
index e8e7a80f690bf..4a4f77826437b 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      5     1.00                        vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      8     1.00    *                   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
@@ -3228,7 +3264,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1935.00 278.00 579.50 32.00 738.50 486.50 486.50
+# CHECK-NEXT:  -     1935.00 278.00 579.50 48.00 738.50 494.50 494.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -4420,6 +4456,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -     vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -4558,6 +4602,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index 5c12c520b04af..c509e766540b1 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    10.00                       vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  3      2     1.00           *            {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     8     8.00           *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     7     8.00           *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     4.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
@@ -2031,7 +2054,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 398.17 99.67  327.50 327.50 8.00   585.17 2.00   8.00   8.00   8.00
+# CHECK-NEXT:  -     612.00 411.17 103.67 327.50 327.50 48.50  593.17 6.00   48.50  48.50  48.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2294,6 +2317,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     10.00  2.00    -      -      -      -     1.00    -      -      -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -      -      -     vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -      -      -     vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -      -      -     {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   1.00    -     0.50   0.50   0.50   {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -     vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -      -      -     vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -      -      -     vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -2747,6 +2772,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpshufd	$0, (%rax){1to16}, %zmm19
@@ -2810,6 +2839,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     8.00   1.50   0.50   8.00   8.00   8.00   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   0.50   0.50   4.00   4.00   4.00   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
index 375087ae0cfe4..00e5c3b03f6f5 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     2.00           *            vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     4.00           *            vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     0.50    *                   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     2.00           *            vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     1.00           *            vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     4.00           *            vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     1.00           *            vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     2.00           *            vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
@@ -3232,7 +3268,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     423.00 438.33 413.33 492.50 492.50 16.00  722.33 4.00   16.00  16.00  16.00
+# CHECK-NEXT:  -     423.00 462.33 421.33 492.50 492.50 44.00  738.33 12.00  44.00  44.00  44.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -4424,6 +4460,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -      -      -     vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -      -      -     vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -      -      -     vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   1.50   0.50   2.00   2.00   2.00   vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   1.50   0.50   4.00   4.00   4.00   vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     0.50    -      -      -      -     vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpshufd	$0, (%rax){1to4}, %xmm19
@@ -4562,6 +4606,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     0.50    -      -      -      -     vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     0.50    -      -      -      -     vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   1.50   0.50   2.00   2.00   2.00   vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   0.50   0.50   1.00   1.00   1.00   vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     4.00   1.50   0.50   4.00   4.00   4.00   vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     1.00   1.50   0.50   1.00   1.00   1.00   vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50    -      -     2.00   0.50   0.50   2.00   2.00   2.00   vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
index b34ccaacc11a3..b2fde3929106a 100644
--- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    2.00                        vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    2.00    *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    2.00    *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      4     1.00                        {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  3      12    1.00           *            {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    1.00    *                   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      11    1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  35     19    8.00           *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      9     1.00    *                   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  2      9     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      9     1.00    *                   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      9     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  35     19    8.00           *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
@@ -2032,7 +2055,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 490.00 12.00  218.33 218.33 8.00   575.00  -     8.00   8.00   8.00    -     218.33  -
+# CHECK-NEXT: 508.60 13.60  218.33 218.33 48.50  578.60 1.60   48.50  48.50  48.50  1.60   218.33  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -2295,6 +2318,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT: 2.50    -      -      -      -     0.50    -      -      -      -      -      -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT: 2.50    -     0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT: 2.50    -     0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1.00    -      -      -      -     1.00    -      -      -      -      -      -      -     {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -     0.50   1.00    -     0.50   0.50   0.50    -      -      -     {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT: 1.00    -      -      -      -      -      -      -      -      -      -      -      -     vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT: 1.00    -     0.33   0.33    -      -      -      -      -      -      -     0.33    -     vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT: 1.00    -     0.33   0.33    -      -      -      -      -      -      -     0.33    -     vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -2748,6 +2773,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20   0.20    -      -     8.00   0.20   0.20   8.00   8.00   8.00   0.20    -      -     vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpshufd	$0, (%rax){1to16}, %zmm19
@@ -2811,6 +2840,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2.20   0.20    -      -     8.00   0.20   0.20   8.00   8.00   8.00   0.20    -      -     vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 2.20   0.20    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
index 3ad66f1c3d712..d8c76832d38d3 100644
--- a/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      18    1.00    *                   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      18    1.00    *                   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  2      8     0.50    *                   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  2      8     0.50    *                   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      9     0.50    *                   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      9     0.50    *                   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  11     12    2.00           *            vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      12    1.00           *            vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  19     12    4.00           *            vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     12    2.00           *            vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      11    1.00    *                   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
@@ -3233,7 +3269,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 377.33 401.33 328.33 328.33 16.00  794.33  -     16.00  16.00  16.00   -     328.33  -
+# CHECK-NEXT: 404.53 412.53 328.33 328.33 46.00  797.53 3.20   46.00  46.00  46.00  3.20   328.33  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -4425,6 +4461,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT: 1.00   1.00    -      -      -      -      -      -      -      -      -      -      -     vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT: 1.00   1.00   0.33   0.33    -      -      -      -      -      -      -     0.33    -     vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT: 1.00   1.00   0.33   0.33    -      -      -      -      -      -      -     0.33    -     vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -     0.50    -      -      -     0.50    -      -      -      -      -      -      -     vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpshufd	$0, (%rax){1to4}, %xmm19
@@ -4563,6 +4607,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -     0.50    -      -      -     0.50    -      -      -      -      -      -      -     vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     0.50   0.33   0.33    -     0.50    -      -      -      -      -     0.33    -     vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     1.00   0.20   0.20   1.00   1.00   1.00   0.20    -      -     vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     4.00   0.20   0.20   4.00   4.00   4.00   0.20    -      -     vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 1.70   0.70    -      -     2.00   0.20   0.20   2.00   2.00   2.00   0.20    -      -     vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -      -      -      -      -      -     vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -     0.33   0.33    -     1.00    -      -      -      -      -     0.33    -     vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index b1bfd7a9ec448..9c006d4ebb077 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    10.00                       vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      3     1.00                        {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  3      2     1.00           *            {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     0.50                        vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     8     16.00          *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  2      8     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  36     7     16.00          *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  19     7     8.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
@@ -2029,7 +2052,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 339.67 99.67  332.83 332.83 16.00  643.67 2.00   5.33
+# CHECK-NEXT:  -     612.00 352.67 103.67 359.83 359.83 97.00  651.67 6.00   32.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2292,6 +2315,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     10.00  2.00    -      -      -      -     1.00    -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -2745,6 +2770,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd	$0, (%rax){1to16}, %zmm19
@@ -2808,6 +2837,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   5.33   5.33   16.00  1.50   0.50   5.33   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   0.50   0.50   2.67   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
index 2ad91ea514aa2..b4b18101a67b8 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     4.00           *            vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     8.00           *            vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  2      7     1.00    *                   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  2      7     1.00    *                   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  12     8     4.00           *            vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  7      7     2.00           *            vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  20     8     8.00           *            vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  8      8     2.00           *            vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  11     7     4.00           *            vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      3     1.00                        vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  2      10    1.00    *                   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
@@ -3230,7 +3266,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     423.00 438.33 350.33 503.17 503.17 32.00  785.33 4.00   10.67
+# CHECK-NEXT:  -     423.00 462.33 358.33 521.83 521.83 88.00  801.33 12.00  29.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -4422,6 +4458,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   1.50   0.50   1.33   vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   1.50   0.50   2.67   vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpshufd	$0, (%rax){1to4}, %xmm19
@@ -4560,6 +4604,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   1.50   0.50   1.33   vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   0.50   0.50   0.67   vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   2.67   2.67   8.00   1.50   0.50   2.67   vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   0.67   0.67   2.00   1.50   0.50   0.67   vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  -      -     1.50   0.50   1.33   1.33   4.00   0.50   0.50   1.33   vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 6742cfccb2d00..6e52eddd9a8f5 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -298,6 +298,9 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+{evex} vextractps $1, %xmm0, %rcx
+{evex} vextractps $1, %xmm0, (%rax)
+
 vfmadd132pd       %zmm16, %zmm17, %zmm19
 vfmadd132pd       (%rax), %zmm17, %zmm19
 vfmadd132pd       (%rax){1to8}, %zmm17, %zmm19
@@ -811,6 +814,11 @@ vpermq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpermq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpscatterdd       %zmm1, (%rdx,%zmm0,4) {%k1}
+vpscatterdq       %zmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqd       %ymm1, (%rdx,%zmm0,4) {%k1}
+vpscatterqq       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vpshufd           $0, %zmm16, %zmm19
 vpshufd           $0, (%rax), %zmm19
 vpshufd           $0, (%rax){1to16}, %zmm19
@@ -881,6 +889,11 @@ vpunpcklqdq       %zmm16, %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax), %zmm17, %zmm19 {z}{k1}
 vpunpcklqdq       (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vscatterdps       %zmm1, (%rdx,%zmm0,4) {%k1}
+vscatterdpd       %zmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqps       %ymm1, (%rdx,%zmm0,4) {%k1}
+vscatterqpd       %zmm1, (%rdx,%zmm0,4) {%k1}
+
 vshuff32x4        $0, %zmm16, %zmm17, %zmm19
 vshuff32x4        $0, (%rax), %zmm17, %zmm19
 vshuff32x4        $0, (%rax){1to16}, %zmm17, %zmm19
@@ -1334,6 +1347,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      11    6.00                        vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      18    6.00    *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      18    6.00    *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  2      1     1.00                        {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  2      2     1.00           *            {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      4     1.00                        vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      11    1.00    *                   vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      11    1.00    *                   vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -1787,6 +1802,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      1     0.50                        vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19
@@ -1850,6 +1869,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  1      2     1.00                        vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
@@ -2042,7 +2065,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 2.67   2.67   2.67    -      -      -      -      -     221.00 1060.50 618.00 352.50 295.50 295.50 16.00 199.67 199.67 199.67 194.33 194.33 194.33 8.00   8.00
+# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     221.00 1060.50 618.00 352.50 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50  16.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2305,6 +2328,8 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     6.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -     {evex}	vextractps	$1, %xmm0, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   1.00   0.33   0.33   0.33    -      -      -     0.50   0.50   {evex}	vextractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vfmadd132pd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vfmadd132pd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vfmadd132pd	(%rax){1to8}, %zmm17, %zmm19
@@ -2758,6 +2783,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpermq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpermq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpshufd	$0, %zmm16, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax){1to16}, %zmm19
@@ -2821,6 +2850,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpunpcklqdq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpcklqdq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpcklqdq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps	%zmm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd	%zmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps	%ymm1, (%rdx,%zmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd	%zmm1, (%rdx,%zmm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vshuff32x4	$0, %zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4	$0, (%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4	$0, (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
index 2d26eb50351a0..4636e23d9df3e 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
@@ -1344,6 +1344,16 @@ vpmulld           %ymm16, %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax), %ymm17, %ymm19 {z}{k1}
 vpmulld           (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vpscatterdd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterdq       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqq       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vpscatterdd       %ymm1, (%rdx,%ymm0,4) {%k1}
+vpscatterdq       %ymm1, (%rdx,%xmm0,4) {%k1}
+vpscatterqd       %xmm1, (%rdx,%ymm0,4) {%k1}
+vpscatterqq       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vpshufd           $0, %xmm16, %xmm19
 vpshufd           $0, (%rax), %xmm19
 vpshufd           $0, (%rax){1to4}, %xmm19
@@ -1500,6 +1510,16 @@ vpunpckldq        %ymm16, %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax), %ymm17, %ymm19 {z}{k1}
 vpunpckldq        (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vscatterdps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterdpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%xmm0,4) {%k1}
+vscatterqpd       %xmm1, (%rdx,%xmm0,4) {%k1}
+
+vscatterdps       %ymm1, (%rdx,%ymm0,4) {%k1}
+vscatterdpd       %ymm1, (%rdx,%xmm0,4) {%k1}
+vscatterqps       %xmm1, (%rdx,%ymm0,4) {%k1}
+vscatterqpd       %ymm1, (%rdx,%ymm0,4) {%k1}
+
 vshuff32x4        $0, %ymm16, %ymm17, %ymm19
 vshuff32x4        $0, (%rax), %ymm17, %ymm19
 vshuff32x4        $0, (%rax){1to8}, %ymm17, %ymm19
@@ -2897,6 +2917,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     0.50                        vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      10    0.50    *                   vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpshufd	$0, (%rax){1to4}, %xmm19
@@ -3035,6 +3063,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT:  1      1     1.00           *            vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  1      2     1.00                        vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  3      9     1.00    *                   vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19
@@ -3243,7 +3279,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     208.00 948.00 501.50 261.50 478.50 478.50 32.00  324.33 324.33 324.33 313.67 313.67 313.67 16.00  16.00
+# CHECK-NEXT: 10.67  10.67  10.67   -      -      -      -      -     208.00 948.00 501.50 261.50 478.50 478.50 32.00  335.00 335.00 335.00 313.67 313.67 313.67 32.00  32.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -4435,6 +4471,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50    -      -      -      -      -      -      -      -      -      -      -     vpmulld	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpmulld	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpmulld	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdd	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterdq	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqd	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vpscatterqq	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpshufd	$0, %xmm16, %xmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax), %xmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax){1to4}, %xmm19
@@ -4573,6 +4617,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -      -      -      -      -      -      -      -      -      -      -      -     vpunpckldq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpckldq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     0.50   0.50    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpunpckldq	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd	%xmm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdps	%ymm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterdpd	%ymm1, (%rdx,%xmm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqps	%xmm1, (%rdx,%ymm0,4) {%k1}
+# CHECK-NEXT: 0.33   0.33   0.33    -      -      -      -      -      -      -      -      -      -      -      -     0.67   0.67   0.67    -      -      -     1.00   1.00   vscatterqpd	%ymm1, (%rdx,%ymm0,4) {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vshuff32x4	$0, %ymm16, %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4	$0, (%rax), %ymm17, %ymm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vshuff32x4	$0, (%rax){1to8}, %ymm17, %ymm19

From 5f744ee5c770d7332740bb6247f961e7d99ee359 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Thu, 22 Aug 2024 08:13:20 -0700
Subject: [PATCH 218/427] [DwarfEhPrepare] Assign dummy debug location for more
 inserted _Unwind_Resume calls (#105513)

Similar to the fix for #57469, ensure that the other `_Unwind_Resume`
call emitted by DwarfEHPrepare has a debug location if needed.

This fixes https://github.com/nbdd0121/unwinding/issues/34.

(cherry picked from commit e76db25832d6ac2d3a36769b26f982d9dee4b346)
---
 llvm/lib/CodeGen/DwarfEHPrepare.cpp           |    7 +
 .../CodeGen/AArch64/dwarf-eh-prepare-dbg.ll   | 1175 +++++++++++++++++
 2 files changed, 1182 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/dwarf-eh-prepare-dbg.ll

diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 324329ce989e7..f4324fffc4ed4 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -293,6 +293,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
   // Call the function.
   CallInst *CI =
       CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+  // The verifier requires that all calls of debug-info-bearing functions
+  // from debug-info-bearing functions have a debug location (for inlining
+  // purposes). Assign a dummy location to satisfy the constraint.
+  Function *RewindFn = dyn_cast<Function>(RewindFunction.getCallee());
+  if (RewindFn && RewindFn->getSubprogram())
+    if (DISubprogram *SP = F.getSubprogram())
+      CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));
   CI->setCallingConv(RewindFunctionCallingConv);
 
   // We never expect _Unwind_Resume to return.
diff --git a/llvm/test/CodeGen/AArch64/dwarf-eh-prepare-dbg.ll b/llvm/test/CodeGen/AArch64/dwarf-eh-prepare-dbg.ll
new file mode 100644
index 0000000000000..020a10f278ed6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dwarf-eh-prepare-dbg.ll
@@ -0,0 +1,1175 @@
+; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -dwarf-eh-prepare < %s | FileCheck %s
+; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -passes=dwarf-eh-prepare < %s | FileCheck %s
+
+; If _Unwind_Resume is defined in the same module and we have debug
+; info, then the inserted _Unwind_Resume calls also need to have a dummy debug
+; location to satisfy inlining requirements.
+
+; CHECK-LABEL: @_ZN9unwinding8unwinder5frame5Frame19evaluate_expression17h2bd8716b79f71675E(
+; CHECK: %exn.obj = phi ptr [ [[A:%.*]], %cleanup.i ], [ [[B:%.*]], %bb44 ]
+; CHECK: call void @_Unwind_Resume(ptr %exn.obj) #2, !dbg !1039
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Function Attrs: uwtable
+declare void @"17h74cc711a87d52d83E"() unnamed_addr #0
+
+; Function Attrs: uwtable
+define void @_ZN9unwinding8unwinder5frame5Frame19evaluate_expression17h2bd8716b79f71675E() unnamed_addr #0 personality ptr @rust_eh_personality !dbg !2 {
+start:
+  invoke void @_ZN4core6result13unwrap_failed17h043998f7f81c2189E() #2
+          to label %unreachable.i unwind label %cleanup.i, !dbg !999
+
+cleanup.i:                                        ; preds = %start
+  %i9 = landingpad { ptr, i32 }
+          cleanup
+  resume { ptr, i32 } undef, !dbg !999
+
+unreachable.i:                                    ; preds = %start
+  unreachable
+
+bb43:                                             ; preds = %cleanup.loopexit.split-lp
+  invoke void @"17h74cc711a87d52d83E"()
+          to label %bb44 unwind label %cleanup.loopexit.split-lp, !dbg !999
+
+cleanup.loopexit.split-lp:                        ; preds = %bb43
+  %lpad.loopexit.split-lp = landingpad { ptr, i32 }
+          cleanup
+  br label %bb43
+
+bb44:                                             ; preds = %bb43
+  resume { ptr, i32 } undef, !dbg !999
+}
+
+; Function Attrs: noreturn uwtable
+define void @_Unwind_Resume(ptr %arg) unnamed_addr #1 !dbg !1039 {
+start:
+  unreachable
+}
+
+declare i32 @rust_eh_personality(...) unnamed_addr
+
+; Function Attrs: noreturn uwtable
+declare void @_ZN4core6result13unwrap_failed17h043998f7f81c2189E() unnamed_addr #1
+
+attributes #0 = { uwtable }
+attributes #1 = { noreturn uwtable }
+attributes #2 = { noreturn }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !DISubprogram(name: "evaluate_expression", linkageName: "_ZN9unwinding8unwinder5frame5Frame19evaluate_expression17h2bd8716b79f71675E", scope: !4, file: !3, line: 79, type: !302, scopeLine: 79, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !564, templateParams: !46, declaration: !607, retainedNodes: !608)
+!3 = !DIFile(filename: "src/unwinder/frame.rs", directory: "/home/dev/ecosystem/unwinding", checksumkind: CSK_MD5, checksum: "8e7ed70cea65000339db1f4ec1025545")
+!4 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Frame", scope: !6, file: !5, size: 35520, align: 64, flags: DIFlagPublic, elements: !9, templateParams: !46, identifier: "668e0516028efb27d51536b6c511f9")
+!5 = !DIFile(filename: "<unknown>", directory: "")
+!6 = !DINamespace(name: "frame", scope: !7)
+!7 = !DINamespace(name: "unwinder", scope: !8)
+!8 = !DINamespace(name: "unwinding", scope: null)
+!9 = !{!10, !205}
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "fde_result", scope: !4, file: !5, baseType: !11, size: 2304, align: 64, flags: DIFlagPrivate)
+!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "FDESearchResult", scope: !12, file: !5, size: 2304, align: 64, flags: DIFlagPublic, elements: !13, templateParams: !46, identifier: "83083fb6983108ea9bd5c8494868595d")
+!12 = !DINamespace(name: "find_fde", scope: !7)
+!13 = !{!14, !171, !194}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "fde", scope: !11, file: !5, baseType: !15, size: 1344, align: 64, offset: 768, flags: DIFlagPublic)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "FrameDescriptionEntry<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>", scope: !16, file: !5, size: 1344, align: 64, flags: DIFlagPublic, elements: !19, templateParams: !134, identifier: "5d3a70f21598ef08f33176ed3c9f48e9")
+!16 = !DINamespace(name: "cfi", scope: !17)
+!17 = !DINamespace(name: "read", scope: !18)
+!18 = !DINamespace(name: "gimli", scope: null)
+!19 = !{!20, !22, !23, !30, !137, !138, !139, !140, !170}
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !15, file: !5, baseType: !21, size: 64, align: 64, offset: 960, flags: DIFlagPrivate)
+!21 = !DIBasicType(name: "usize", size: 64, encoding: DW_ATE_unsigned)
+!22 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !15, file: !5, baseType: !21, size: 64, align: 64, offset: 1024, flags: DIFlagPrivate)
+!23 = !DIDerivedType(tag: DW_TAG_member, name: "format", scope: !15, file: !5, baseType: !24, size: 8, align: 8, offset: 1280, flags: DIFlagPrivate)
+!24 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Format", scope: !25, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !27)
+!25 = !DINamespace(name: "common", scope: !18)
+!26 = !DIBasicType(name: "u8", size: 8, encoding: DW_ATE_unsigned)
+!27 = !{!28, !29}
+!28 = !DIEnumerator(name: "Dwarf64", value: 8, isUnsigned: true)
+!29 = !DIEnumerator(name: "Dwarf32", value: 4, isUnsigned: true)
+!30 = !DIDerivedType(tag: DW_TAG_member, name: "cie", scope: !15, file: !5, baseType: !31, size: 704, align: 64, offset: 128, flags: DIFlagPrivate)
+!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CommonInformationEntry<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>", scope: !16, file: !5, size: 704, align: 64, flags: DIFlagPublic, elements: !32, templateParams: !134, identifier: "d702b04fb343c0c9e9d3001992e9a1")
+!32 = !{!33, !34, !35, !36, !37, !109, !110, !111, !112, !114, !119}
+!33 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !31, file: !5, baseType: !21, size: 64, align: 64, offset: 384, flags: DIFlagPrivate)
+!34 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !31, file: !5, baseType: !21, size: 64, align: 64, offset: 448, flags: DIFlagPrivate)
+!35 = !DIDerivedType(tag: DW_TAG_member, name: "format", scope: !31, file: !5, baseType: !24, size: 8, align: 8, offset: 656, flags: DIFlagPrivate)
+!36 = !DIDerivedType(tag: DW_TAG_member, name: "version", scope: !31, file: !5, baseType: !26, size: 8, align: 8, offset: 664, flags: DIFlagPrivate)
+!37 = !DIDerivedType(tag: DW_TAG_member, name: "augmentation", scope: !31, file: !5, baseType: !38, size: 256, align: 64, flags: DIFlagPrivate)
+!38 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<gimli::read::cfi::Augmentation>", scope: !39, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !41, templateParams: !46, identifier: "3a0af3bf6a8f5409cf76f6c3324a8471")
+!39 = !DINamespace(name: "option", scope: !40)
+!40 = !DINamespace(name: "core", scope: null)
+!41 = !{!42}
+!42 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !38, file: !5, size: 256, align: 64, elements: !43, templateParams: !46, identifier: "38f36f4fe7ce04182001fea3f0ce78b9", discriminator: !108)
+!43 = !{!44, !104}
+!44 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !42, file: !5, baseType: !45, size: 256, align: 64, extraData: i128 3)
+!45 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !38, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !47, identifier: "fc965678566be8f379abea64e9b3abac")
+!46 = !{}
+!47 = !{!48}
+!48 = !DITemplateTypeParameter(name: "T", type: !49)
+!49 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Augmentation", scope: !16, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !50, templateParams: !46, identifier: "437b83c5d52a974b59a7fdcd9a6e5529")
+!50 = !{!51, !69, !101, !102}
+!51 = !DIDerivedType(tag: DW_TAG_member, name: "lsda", scope: !49, file: !5, baseType: !52, size: 16, align: 8, offset: 192, flags: DIFlagPrivate)
+!52 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<gimli::constants::DwEhPe>", scope: !39, file: !5, size: 16, align: 8, flags: DIFlagPublic, elements: !53, templateParams: !46, identifier: "269c974aec4b862f607e3d0f37bf4289")
+!53 = !{!54}
+!54 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !52, file: !5, size: 16, align: 8, elements: !55, templateParams: !46, identifier: "a41237f2e73a6dfd1e15b8d422b0caf", discriminator: !68)
+!55 = !{!56, !64}
+!56 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !54, file: !5, baseType: !57, size: 16, align: 8, extraData: i128 0)
+!57 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !52, file: !5, size: 16, align: 8, flags: DIFlagPublic, elements: !46, templateParams: !58, identifier: "f43d443124b25d2cf6c3daa0ca6dbe8e")
+!58 = !{!59}
+!59 = !DITemplateTypeParameter(name: "T", type: !60)
+!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwEhPe", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !62, templateParams: !46, identifier: "be9eed4e424cae07de87786ea65cc31a")
+!61 = !DINamespace(name: "constants", scope: !18)
+!62 = !{!63}
+!63 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !60, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!64 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !54, file: !5, baseType: !65, size: 16, align: 8, extraData: i128 1)
+!65 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !52, file: !5, size: 16, align: 8, flags: DIFlagPublic, elements: !66, templateParams: !58, identifier: "2c48764c698ea6f9c8867fbde87bbb17")
+!66 = !{!67}
+!67 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !65, file: !5, baseType: !60, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!68 = !DIDerivedType(tag: DW_TAG_member, scope: !52, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagArtificial)
+!69 = !DIDerivedType(tag: DW_TAG_member, name: "personality", scope: !49, file: !5, baseType: !70, size: 192, align: 64, flags: DIFlagPrivate)
+!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<(gimli::constants::DwEhPe, gimli::read::cfi::Pointer)>", scope: !39, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !71, templateParams: !46, identifier: "1fdf28ed684e117d758984993d4c393")
+!71 = !{!72}
+!72 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !70, file: !5, size: 192, align: 64, elements: !73, templateParams: !46, identifier: "186ecead5120d6ea5f8043eb398d7171", discriminator: !100)
+!73 = !{!74, !96}
+!74 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !72, file: !5, baseType: !75, size: 192, align: 64, extraData: i128 2)
+!75 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !70, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !76, identifier: "8ffc865f6e828740f09ae9b5f0639b8b")
+!76 = !{!77}
+!77 = !DITemplateTypeParameter(name: "T", type: !78)
+!78 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "(gimli::constants::DwEhPe, gimli::read::cfi::Pointer)", file: !5, size: 192, align: 64, elements: !79, templateParams: !46, identifier: "1135188aa1561172ec3fe388e59e5ad3")
+!79 = !{!80, !81}
+!80 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !78, file: !5, baseType: !60, size: 8, align: 8)
+!81 = !DIDerivedType(tag: DW_TAG_member, name: "__1", scope: !78, file: !5, baseType: !82, size: 128, align: 64, offset: 64)
+!82 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pointer", scope: !16, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !83, templateParams: !46, identifier: "54ccac1071baa52ac351c194c6a75888")
+!83 = !{!84}
+!84 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !82, file: !5, size: 128, align: 64, elements: !85, templateParams: !46, identifier: "28884e6a838202696aebd009c8be1e5d", discriminator: !95)
+!85 = !{!86, !91}
+!86 = !DIDerivedType(tag: DW_TAG_member, name: "Direct", scope: !84, file: !5, baseType: !87, size: 128, align: 64, extraData: i128 0)
+!87 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Direct", scope: !82, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !88, templateParams: !46, identifier: "d33e0cdef11588372b91f9bf78c76dea")
+!88 = !{!89}
+!89 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !87, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!90 = !DIBasicType(name: "u64", size: 64, encoding: DW_ATE_unsigned)
+!91 = !DIDerivedType(tag: DW_TAG_member, name: "Indirect", scope: !84, file: !5, baseType: !92, size: 128, align: 64, extraData: i128 1)
+!92 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Indirect", scope: !82, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !93, templateParams: !46, identifier: "878c7c7b43c77510f5719ec0e083c0df")
+!93 = !{!94}
+!94 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !92, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!95 = !DIDerivedType(tag: DW_TAG_member, scope: !82, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!96 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !72, file: !5, baseType: !97, size: 192, align: 64)
+!97 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !70, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !98, templateParams: !76, identifier: "dbb5400f4ed59423d6d6c8d7daf9d6e2")
+!98 = !{!99}
+!99 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !97, file: !5, baseType: !78, size: 192, align: 64, flags: DIFlagPublic)
+!100 = !DIDerivedType(tag: DW_TAG_member, scope: !70, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagArtificial)
+!101 = !DIDerivedType(tag: DW_TAG_member, name: "fde_address_encoding", scope: !49, file: !5, baseType: !52, size: 16, align: 8, offset: 208, flags: DIFlagPrivate)
+!102 = !DIDerivedType(tag: DW_TAG_member, name: "is_signal_trampoline", scope: !49, file: !5, baseType: !103, size: 8, align: 8, offset: 224, flags: DIFlagPrivate)
+!103 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
+!104 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !42, file: !5, baseType: !105, size: 256, align: 64)
+!105 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !38, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !106, templateParams: !47, identifier: "7e0027ca0949f7cbda755de183ba6ac7")
+!106 = !{!107}
+!107 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !105, file: !5, baseType: !49, size: 256, align: 64, flags: DIFlagPublic)
+!108 = !DIDerivedType(tag: DW_TAG_member, scope: !38, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagArtificial)
+!109 = !DIDerivedType(tag: DW_TAG_member, name: "address_size", scope: !31, file: !5, baseType: !26, size: 8, align: 8, offset: 672, flags: DIFlagPrivate)
+!110 = !DIDerivedType(tag: DW_TAG_member, name: "segment_size", scope: !31, file: !5, baseType: !26, size: 8, align: 8, offset: 680, flags: DIFlagPrivate)
+!111 = !DIDerivedType(tag: DW_TAG_member, name: "code_alignment_factor", scope: !31, file: !5, baseType: !90, size: 64, align: 64, offset: 512, flags: DIFlagPrivate)
+!112 = !DIDerivedType(tag: DW_TAG_member, name: "data_alignment_factor", scope: !31, file: !5, baseType: !113, size: 64, align: 64, offset: 576, flags: DIFlagPrivate)
+!113 = !DIBasicType(name: "i64", size: 64, encoding: DW_ATE_signed)
+!114 = !DIDerivedType(tag: DW_TAG_member, name: "return_address_register", scope: !31, file: !5, baseType: !115, size: 16, align: 16, offset: 640, flags: DIFlagPrivate)
+!115 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Register", scope: !25, file: !5, size: 16, align: 16, flags: DIFlagPublic, elements: !116, templateParams: !46, identifier: "ab7721750f04c98f7840c9b9e52b656e")
+!116 = !{!117}
+!117 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !115, file: !5, baseType: !118, size: 16, align: 16, flags: DIFlagPublic)
+!118 = !DIBasicType(name: "u16", size: 16, encoding: DW_ATE_unsigned)
+!119 = !DIDerivedType(tag: DW_TAG_member, name: "initial_instructions", scope: !31, file: !5, baseType: !120, size: 128, align: 64, offset: 256, flags: DIFlagPrivate)
+!120 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EndianSlice<gimli::endianity::LittleEndian>", scope: !121, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !122, templateParams: !132, identifier: "da07131e2106746f95d2ea314dd1a7d6")
+!121 = !DINamespace(name: "endian_slice", scope: !17)
+!122 = !{!123, !129}
+!123 = !DIDerivedType(tag: DW_TAG_member, name: "slice", scope: !120, file: !5, baseType: !124, size: 128, align: 64, flags: DIFlagPrivate)
+!124 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "&[u8]", file: !5, size: 128, align: 64, elements: !125, templateParams: !46, identifier: "31681e0c10b314f1f33e38b2779acbb4")
+!125 = !{!126, !128}
+!126 = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", scope: !124, file: !5, baseType: !127, size: 64, align: 64)
+!127 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64, align: 64, dwarfAddressSpace: 0)
+!128 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !124, file: !5, baseType: !21, size: 64, align: 64, offset: 64)
+!129 = !DIDerivedType(tag: DW_TAG_member, name: "endian", scope: !120, file: !5, baseType: !130, align: 8, offset: 128, flags: DIFlagPrivate)
+!130 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "LittleEndian", scope: !131, file: !5, align: 8, flags: DIFlagPublic, elements: !46, identifier: "3d0f5d089fd1d1e4e850cd8b54585231")
+!131 = !DINamespace(name: "endianity", scope: !18)
+!132 = !{!133}
+!133 = !DITemplateTypeParameter(name: "Endian", type: !130)
+!134 = !{!135, !136}
+!135 = !DITemplateTypeParameter(name: "R", type: !120)
+!136 = !DITemplateTypeParameter(name: "Offset", type: !21)
+!137 = !DIDerivedType(tag: DW_TAG_member, name: "initial_segment", scope: !15, file: !5, baseType: !90, size: 64, align: 64, offset: 1088, flags: DIFlagPrivate)
+!138 = !DIDerivedType(tag: DW_TAG_member, name: "initial_address", scope: !15, file: !5, baseType: !90, size: 64, align: 64, offset: 1152, flags: DIFlagPrivate)
+!139 = !DIDerivedType(tag: DW_TAG_member, name: "address_range", scope: !15, file: !5, baseType: !90, size: 64, align: 64, offset: 1216, flags: DIFlagPrivate)
+!140 = !DIDerivedType(tag: DW_TAG_member, name: "augmentation", scope: !15, file: !5, baseType: !141, size: 128, align: 64, flags: DIFlagPrivate)
+!141 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<gimli::read::cfi::AugmentationData>", scope: !39, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !142, templateParams: !46, identifier: "6c86ed6ec859a01352ba34a0d8b67b42")
+!142 = !{!143}
+!143 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !141, file: !5, size: 128, align: 64, elements: !144, templateParams: !46, identifier: "fba0139e17508a99930ee3f15479fed", discriminator: !169)
+!144 = !{!145, !165}
+!145 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !143, file: !5, baseType: !146, size: 128, align: 64, extraData: i128 3)
+!146 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !141, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !147, identifier: "80c4c2d1f17a14408b80f1b79766a748")
+!147 = !{!148}
+!148 = !DITemplateTypeParameter(name: "T", type: !149)
+!149 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AugmentationData", scope: !16, file: !5, size: 128, align: 64, flags: DIFlagPrivate, elements: !150, templateParams: !46, identifier: "3de3a0bd67f5300b194fe75d6a32ba34")
+!150 = !{!151}
+!151 = !DIDerivedType(tag: DW_TAG_member, name: "lsda", scope: !149, file: !5, baseType: !152, size: 128, align: 64, flags: DIFlagPrivate)
+!152 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<gimli::read::cfi::Pointer>", scope: !39, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !153, templateParams: !46, identifier: "63ae53efcc3b644d5af5be76d59f1b22")
+!153 = !{!154}
+!154 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !152, file: !5, size: 128, align: 64, elements: !155, templateParams: !46, identifier: "7cc3843db76c59d9856a0146da2deda", discriminator: !164)
+!155 = !{!156, !160}
+!156 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !154, file: !5, baseType: !157, size: 128, align: 64, extraData: i128 2)
+!157 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !152, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !158, identifier: "3f7569d9bddac98414af4ae68a670d47")
+!158 = !{!159}
+!159 = !DITemplateTypeParameter(name: "T", type: !82)
+!160 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !154, file: !5, baseType: !161, size: 128, align: 64)
+!161 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !152, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !162, templateParams: !158, identifier: "a59a0ac8e43839cb28a35341b044ba6")
+!162 = !{!163}
+!163 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !161, file: !5, baseType: !82, size: 128, align: 64, flags: DIFlagPublic)
+!164 = !DIDerivedType(tag: DW_TAG_member, scope: !152, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!165 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !143, file: !5, baseType: !166, size: 128, align: 64)
+!166 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !141, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !167, templateParams: !147, identifier: "310743464abe902a5334b2cd55207cfe")
+!167 = !{!168}
+!168 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !166, file: !5, baseType: !149, size: 128, align: 64, flags: DIFlagPublic)
+!169 = !DIDerivedType(tag: DW_TAG_member, scope: !141, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!170 = !DIDerivedType(tag: DW_TAG_member, name: "instructions", scope: !15, file: !5, baseType: !120, size: 128, align: 64, offset: 832, flags: DIFlagPrivate)
+!171 = !DIDerivedType(tag: DW_TAG_member, name: "bases", scope: !11, file: !5, baseType: !172, size: 768, align: 64, flags: DIFlagPublic)
+!172 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BaseAddresses", scope: !16, file: !5, size: 768, align: 64, flags: DIFlagPublic, elements: !173, templateParams: !46, identifier: "4f73e7ef799b29fbae067e0be323dcfe")
+!173 = !{!174, !193}
+!174 = !DIDerivedType(tag: DW_TAG_member, name: "eh_frame_hdr", scope: !172, file: !5, baseType: !175, size: 384, align: 64, flags: DIFlagPublic)
+!175 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "SectionBaseAddresses", scope: !16, file: !5, size: 384, align: 64, flags: DIFlagPublic, elements: !176, templateParams: !46, identifier: "78885c23ca9bb0ec15b9d93531201334")
+!176 = !{!177, !191, !192}
+!177 = !DIDerivedType(tag: DW_TAG_member, name: "section", scope: !175, file: !5, baseType: !178, size: 128, align: 64, flags: DIFlagPublic)
+!178 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<u64>", scope: !39, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !179, templateParams: !46, identifier: "a764e4be4144b599a440b2d3c234bd8f")
+!179 = !{!180}
+!180 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !178, file: !5, size: 128, align: 64, elements: !181, templateParams: !46, identifier: "5f71a65a6e7dd57c14e50416050c3a90", discriminator: !190)
+!181 = !{!182, !186}
+!182 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !180, file: !5, baseType: !183, size: 128, align: 64, extraData: i128 0)
+!183 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !178, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !184, identifier: "fc451b096a948ef431e3a4971f318c0")
+!184 = !{!185}
+!185 = !DITemplateTypeParameter(name: "T", type: !90)
+!186 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !180, file: !5, baseType: !187, size: 128, align: 64, extraData: i128 1)
+!187 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !178, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !188, templateParams: !184, identifier: "321068e05ede571e678dd7e02b883f79")
+!188 = !{!189}
+!189 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !187, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!190 = !DIDerivedType(tag: DW_TAG_member, scope: !178, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!191 = !DIDerivedType(tag: DW_TAG_member, name: "text", scope: !175, file: !5, baseType: !178, size: 128, align: 64, offset: 128, flags: DIFlagPublic)
+!192 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !175, file: !5, baseType: !178, size: 128, align: 64, offset: 256, flags: DIFlagPublic)
+!193 = !DIDerivedType(tag: DW_TAG_member, name: "eh_frame", scope: !172, file: !5, baseType: !175, size: 384, align: 64, offset: 384, flags: DIFlagPublic)
+!194 = !DIDerivedType(tag: DW_TAG_member, name: "eh_frame", scope: !11, file: !5, baseType: !195, size: 192, align: 64, offset: 2112, flags: DIFlagPublic)
+!195 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EhFrame<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>", scope: !16, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !196, templateParams: !204, identifier: "53e7dc3de0299504e773ad37859e191b")
+!196 = !{!197, !198, !199}
+!197 = !DIDerivedType(tag: DW_TAG_member, name: "section", scope: !195, file: !5, baseType: !120, size: 128, align: 64, flags: DIFlagPrivate)
+!198 = !DIDerivedType(tag: DW_TAG_member, name: "address_size", scope: !195, file: !5, baseType: !26, size: 8, align: 8, offset: 128, flags: DIFlagPrivate)
+!199 = !DIDerivedType(tag: DW_TAG_member, name: "vendor", scope: !195, file: !5, baseType: !200, size: 8, align: 8, offset: 136, flags: DIFlagPrivate)
+!200 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Vendor", scope: !25, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !201)
+!201 = !{!202, !203}
+!202 = !DIEnumerator(name: "Default", value: 0, isUnsigned: true)
+!203 = !DIEnumerator(name: "AArch64", value: 1, isUnsigned: true)
+!204 = !{!135}
+!205 = !DIDerivedType(tag: DW_TAG_member, name: "row", scope: !4, file: !5, baseType: !206, size: 33216, align: 64, offset: 2304, flags: DIFlagPrivate)
+!206 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindTableRow<usize, unwinding::unwinder::frame::StoreOnStack>", scope: !16, file: !5, size: 33216, align: 64, flags: DIFlagPublic, elements: !207, templateParams: !299, identifier: "82cd368dc604f667d8116583e5f9088f")
+!207 = !{!208, !209, !210, !211, !232}
+!208 = !DIDerivedType(tag: DW_TAG_member, name: "start_address", scope: !206, file: !5, baseType: !90, size: 64, align: 64, offset: 192, flags: DIFlagPrivate)
+!209 = !DIDerivedType(tag: DW_TAG_member, name: "end_address", scope: !206, file: !5, baseType: !90, size: 64, align: 64, offset: 256, flags: DIFlagPrivate)
+!210 = !DIDerivedType(tag: DW_TAG_member, name: "saved_args_size", scope: !206, file: !5, baseType: !90, size: 64, align: 64, offset: 320, flags: DIFlagPrivate)
+!211 = !DIDerivedType(tag: DW_TAG_member, name: "cfa", scope: !206, file: !5, baseType: !212, size: 192, align: 64, flags: DIFlagPrivate)
+!212 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CfaRule<usize>", scope: !16, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !213, templateParams: !46, identifier: "ade6ef3f7e303426a7d52d422a1beefc")
+!213 = !{!214}
+!214 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !212, file: !5, size: 192, align: 64, elements: !215, templateParams: !46, identifier: "122b00cae98c57f43a7358b8913c043", discriminator: !231)
+!215 = !{!216, !223}
+!216 = !DIDerivedType(tag: DW_TAG_member, name: "RegisterAndOffset", scope: !214, file: !5, baseType: !217, size: 192, align: 64, extraData: i128 0)
+!217 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RegisterAndOffset", scope: !212, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !218, templateParams: !221, identifier: "119bb0cef257ae2dfac36839c7598241")
+!218 = !{!219, !220}
+!219 = !DIDerivedType(tag: DW_TAG_member, name: "register", scope: !217, file: !5, baseType: !115, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!220 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !217, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!221 = !{!222}
+!222 = !DITemplateTypeParameter(name: "T", type: !21)
+!223 = !DIDerivedType(tag: DW_TAG_member, name: "Expression", scope: !214, file: !5, baseType: !224, size: 192, align: 64, extraData: i128 1)
+!224 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Expression", scope: !212, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !225, templateParams: !221, identifier: "aaead2b15d3fe770a8c30bba7fd8ad61")
+!225 = !{!226}
+!226 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !224, file: !5, baseType: !227, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!227 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindExpression<usize>", scope: !16, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !228, templateParams: !221, identifier: "83534a13fed1c49912ea150ec14a104b")
+!228 = !{!229, !230}
+!229 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !227, file: !5, baseType: !21, size: 64, align: 64, flags: DIFlagPublic)
+!230 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !227, file: !5, baseType: !21, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!231 = !DIDerivedType(tag: DW_TAG_member, scope: !212, file: !5, baseType: !118, size: 16, align: 16, flags: DIFlagArtificial)
+!232 = !DIDerivedType(tag: DW_TAG_member, name: "registers", scope: !206, file: !5, baseType: !233, size: 32832, align: 64, offset: 384, flags: DIFlagPrivate)
+!233 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RegisterRuleMap<usize, unwinding::unwinder::frame::StoreOnStack>", scope: !16, file: !5, size: 32832, align: 64, flags: DIFlagPrivate, elements: !234, templateParams: !299, identifier: "cc473d27e5413589c350ce3df23f43da")
+!234 = !{!235}
+!235 = !DIDerivedType(tag: DW_TAG_member, name: "rules", scope: !233, file: !5, baseType: !236, size: 32832, align: 64, flags: DIFlagPrivate)
+!236 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ArrayVec<[(gimli::common::Register, gimli::read::cfi::RegisterRule<usize>); 128]>", scope: !237, file: !5, size: 32832, align: 64, flags: DIFlagProtected, elements: !238, templateParams: !296, identifier: "d22acd2dadf9e1b9a77b71369aacf142")
+!237 = !DINamespace(name: "util", scope: !17)
+!238 = !{!239, !295}
+!239 = !DIDerivedType(tag: DW_TAG_member, name: "storage", scope: !236, file: !5, baseType: !240, size: 32768, align: 64, flags: DIFlagPrivate)
+!240 = !DICompositeType(tag: DW_TAG_array_type, baseType: !241, size: 32768, align: 64, elements: !293)
+!241 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "MaybeUninit<(gimli::common::Register, gimli::read::cfi::RegisterRule<usize>)>", scope: !242, file: !5, size: 256, align: 64, elements: !244, templateParams: !291, identifier: "5969f24240e56f6ba292746e32127a72")
+!242 = !DINamespace(name: "maybe_uninit", scope: !243)
+!243 = !DINamespace(name: "mem", scope: !40)
+!244 = !{!245, !247}
+!245 = !DIDerivedType(tag: DW_TAG_member, name: "uninit", scope: !241, file: !5, baseType: !246, align: 8)
+!246 = !DIBasicType(name: "()", encoding: DW_ATE_unsigned)
+!247 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !241, file: !5, baseType: !248, size: 256, align: 64)
+!248 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ManuallyDrop<(gimli::common::Register, gimli::read::cfi::RegisterRule<usize>)>", scope: !249, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !250, templateParams: !291, identifier: "5694f7bb71f656adeda6ad1c7765e67e")
+!249 = !DINamespace(name: "manually_drop", scope: !243)
+!250 = !{!251}
+!251 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !248, file: !5, baseType: !252, size: 256, align: 64, flags: DIFlagPrivate)
+!252 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "(gimli::common::Register, gimli::read::cfi::RegisterRule<usize>)", file: !5, size: 256, align: 64, elements: !253, templateParams: !46, identifier: "d5b89cd0eff48be0d57ac79c0fc2d497")
+!253 = !{!254, !255}
+!254 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !252, file: !5, baseType: !115, size: 16, align: 16)
+!255 = !DIDerivedType(tag: DW_TAG_member, name: "__1", scope: !252, file: !5, baseType: !256, size: 192, align: 64, offset: 64)
+!256 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RegisterRule<usize>", scope: !16, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !257, templateParams: !46, identifier: "279b181cb161e87c468503bd8374c58f")
+!257 = !{!258}
+!258 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !256, file: !5, size: 192, align: 64, elements: !259, templateParams: !46, identifier: "6368babf0c076b47c26cd741e43d73b1", discriminator: !290)
+!259 = !{!260, !262, !264, !268, !272, !276, !280, !284, !286}
+!260 = !DIDerivedType(tag: DW_TAG_member, name: "Undefined", scope: !258, file: !5, baseType: !261, size: 192, align: 64, extraData: i128 0)
+!261 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Undefined", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !221, identifier: "b6231c923d5de1d6ea3dc67ae4d2b744")
+!262 = !DIDerivedType(tag: DW_TAG_member, name: "SameValue", scope: !258, file: !5, baseType: !263, size: 192, align: 64, extraData: i128 1)
+!263 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "SameValue", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !221, identifier: "366c56686b859fc982e55b9b99b114b7")
+!264 = !DIDerivedType(tag: DW_TAG_member, name: "Offset", scope: !258, file: !5, baseType: !265, size: 192, align: 64, extraData: i128 2)
+!265 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Offset", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !266, templateParams: !221, identifier: "cbb6f8da06b5c0ef681c5662cd576261")
+!266 = !{!267}
+!267 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !265, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!268 = !DIDerivedType(tag: DW_TAG_member, name: "ValOffset", scope: !258, file: !5, baseType: !269, size: 192, align: 64, extraData: i128 3)
+!269 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ValOffset", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !270, templateParams: !221, identifier: "9cc23c9ab3b44583f82e31fd655f575d")
+!270 = !{!271}
+!271 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !269, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!272 = !DIDerivedType(tag: DW_TAG_member, name: "Register", scope: !258, file: !5, baseType: !273, size: 192, align: 64, extraData: i128 4)
+!273 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Register", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !274, templateParams: !221, identifier: "801c2836f75f94b250a3dfb037480760")
+!274 = !{!275}
+!275 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !273, file: !5, baseType: !115, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!276 = !DIDerivedType(tag: DW_TAG_member, name: "Expression", scope: !258, file: !5, baseType: !277, size: 192, align: 64, extraData: i128 5)
+!277 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Expression", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !278, templateParams: !221, identifier: "986350edfa7856f0363c61bb25dba41c")
+!278 = !{!279}
+!279 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !277, file: !5, baseType: !227, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!280 = !DIDerivedType(tag: DW_TAG_member, name: "ValExpression", scope: !258, file: !5, baseType: !281, size: 192, align: 64, extraData: i128 6)
+!281 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ValExpression", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !282, templateParams: !221, identifier: "3d4759b08ddb382cf045cef4d1c0c0d4")
+!282 = !{!283}
+!283 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !281, file: !5, baseType: !227, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!284 = !DIDerivedType(tag: DW_TAG_member, name: "Architectural", scope: !258, file: !5, baseType: !285, size: 192, align: 64, extraData: i128 7)
+!285 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Architectural", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !221, identifier: "f12af9617d5705e1d07fb2fcddd9c85b")
+!286 = !DIDerivedType(tag: DW_TAG_member, name: "Constant", scope: !258, file: !5, baseType: !287, size: 192, align: 64, extraData: i128 8)
+!287 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Constant", scope: !256, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !288, templateParams: !221, identifier: "576df75a63490388e372e15bd69f179d")
+!288 = !{!289}
+!289 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !287, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!290 = !DIDerivedType(tag: DW_TAG_member, scope: !256, file: !5, baseType: !118, size: 16, align: 16, flags: DIFlagArtificial)
+!291 = !{!292}
+!292 = !DITemplateTypeParameter(name: "T", type: !252)
+!293 = !{!294}
+!294 = !DISubrange(count: 128, lowerBound: 0)
+!295 = !DIDerivedType(tag: DW_TAG_member, name: "len", scope: !236, file: !5, baseType: !21, size: 64, align: 64, offset: 32768, flags: DIFlagPrivate)
+!296 = !{!297}
+!297 = !DITemplateTypeParameter(name: "A", type: !298)
+!298 = !DICompositeType(tag: DW_TAG_array_type, baseType: !252, size: 32768, align: 64, elements: !293)
+!299 = !{!222, !300}
+!300 = !DITemplateTypeParameter(name: "S", type: !301)
+!301 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "StoreOnStack", scope: !6, file: !5, align: 8, flags: DIFlagPrivate, elements: !46, identifier: "21dc88df75a1263a13dd24276b92d3e")
+!302 = !DISubroutineType(types: !303)
+!303 = !{!304, !549, !550, !227}
+!304 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result<usize, gimli::read::Error>", scope: !305, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !306, templateParams: !46, identifier: "2930057b85b47f2bef5979de26a87b97")
+!305 = !DINamespace(name: "result", scope: !40)
+!306 = !{!307}
+!307 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !304, file: !5, size: 128, align: 64, elements: !308, templateParams: !46, identifier: "2298ec794572047066c0d72c8d834034", discriminator: !548)
+!308 = !{!309, !544}
+!309 = !DIDerivedType(tag: DW_TAG_member, name: "Ok", scope: !307, file: !5, baseType: !310, size: 128, align: 64, extraData: i128 77)
+!310 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ok", scope: !304, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !311, templateParams: !313, identifier: "836193d46f427d39e980f444791b92f5")
+!311 = !{!312}
+!312 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !310, file: !5, baseType: !21, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!313 = !{!222, !314}
+!314 = !DITemplateTypeParameter(name: "E", type: !315)
+!315 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Error", scope: !17, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !316, templateParams: !46, identifier: "d77646015d26471497f49470c6fe61cb")
+!316 = !{!317}
+!317 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !315, file: !5, size: 128, align: 64, elements: !318, templateParams: !46, identifier: "ca3f5d09babb12e628f9b9b97696a8e4", discriminator: !543)
+!318 = !{!319, !321, !323, !325, !327, !329, !331, !333, !335, !337, !339, !341, !343, !350, !352, !354, !356, !358, !362, !366, !374, !376, !383, !390, !397, !404, !408, !412, !416, !418, !420, !422, !424, !426, !428, !430, !432, !436, !438, !440, !442, !449, !451, !453, !457, !459, !461, !463, !465, !467, !474, !476, !478, !480, !482, !484, !488, !490, !492, !494, !496, !500, !502, !504, !506, !508, !510, !512, !514, !516, !518, !520, !522, !524, !526, !528, !536}
+!319 = !DIDerivedType(tag: DW_TAG_member, name: "Io", scope: !317, file: !5, baseType: !320, size: 128, align: 64, extraData: i128 0)
+!320 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Io", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "2320550a9c63e2aae32b0db85e194ad6")
+!321 = !DIDerivedType(tag: DW_TAG_member, name: "PcRelativePointerButSectionBaseIsUndefined", scope: !317, file: !5, baseType: !322, size: 128, align: 64, extraData: i128 1)
+!322 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PcRelativePointerButSectionBaseIsUndefined", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "8be741069f75ec37c15f7c3a00f5725f")
+!323 = !DIDerivedType(tag: DW_TAG_member, name: "TextRelativePointerButTextBaseIsUndefined", scope: !317, file: !5, baseType: !324, size: 128, align: 64, extraData: i128 2)
+!324 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TextRelativePointerButTextBaseIsUndefined", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "378958e13527898498ae80867eb2d180")
+!325 = !DIDerivedType(tag: DW_TAG_member, name: "DataRelativePointerButDataBaseIsUndefined", scope: !317, file: !5, baseType: !326, size: 128, align: 64, extraData: i128 3)
+!326 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DataRelativePointerButDataBaseIsUndefined", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f6d90525538ea3822cc7864c7d6e132a")
+!327 = !DIDerivedType(tag: DW_TAG_member, name: "FuncRelativePointerInBadContext", scope: !317, file: !5, baseType: !328, size: 128, align: 64, extraData: i128 4)
+!328 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "FuncRelativePointerInBadContext", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "b3ef419b95336ce3759c0b0990f112fc")
+!329 = !DIDerivedType(tag: DW_TAG_member, name: "CannotParseOmitPointerEncoding", scope: !317, file: !5, baseType: !330, size: 128, align: 64, extraData: i128 5)
+!330 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CannotParseOmitPointerEncoding", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "eb130e7d2deb1502c84e84a2fb5387d")
+!331 = !DIDerivedType(tag: DW_TAG_member, name: "BadUnsignedLeb128", scope: !317, file: !5, baseType: !332, size: 128, align: 64, extraData: i128 6)
+!332 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadUnsignedLeb128", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "90e4e9ce1b5f9ac0ab73ec51465666a9")
+!333 = !DIDerivedType(tag: DW_TAG_member, name: "BadSignedLeb128", scope: !317, file: !5, baseType: !334, size: 128, align: 64, extraData: i128 7)
+!334 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadSignedLeb128", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "3c015a8355d523b8e6fb55a5ad8fdb42")
+!335 = !DIDerivedType(tag: DW_TAG_member, name: "AbbreviationTagZero", scope: !317, file: !5, baseType: !336, size: 128, align: 64, extraData: i128 8)
+!336 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AbbreviationTagZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "21f316a516256bf01c07a0b30b05f15c")
+!337 = !DIDerivedType(tag: DW_TAG_member, name: "AttributeFormZero", scope: !317, file: !5, baseType: !338, size: 128, align: 64, extraData: i128 9)
+!338 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AttributeFormZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f33bf8117b76a81a643250c2d9a8b44f")
+!339 = !DIDerivedType(tag: DW_TAG_member, name: "BadHasChildren", scope: !317, file: !5, baseType: !340, size: 128, align: 64, extraData: i128 10)
+!340 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadHasChildren", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "edc800ff621c7b61d39a6e66a7f2e00")
+!341 = !DIDerivedType(tag: DW_TAG_member, name: "BadLength", scope: !317, file: !5, baseType: !342, size: 128, align: 64, extraData: i128 11)
+!342 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadLength", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "abbcc790b2b7b20eb0bda85a8691aacb")
+!343 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownForm", scope: !317, file: !5, baseType: !344, size: 128, align: 64, extraData: i128 12)
+!344 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownForm", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !345, templateParams: !46, identifier: "bf6f1dc4dab6ba136decfeee45bc099b")
+!345 = !{!346}
+!346 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !344, file: !5, baseType: !347, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!347 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwForm", scope: !61, file: !5, size: 16, align: 16, flags: DIFlagPublic, elements: !348, templateParams: !46, identifier: "48a0ca1754029669f804908ef0889f77")
+!348 = !{!349}
+!349 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !347, file: !5, baseType: !118, size: 16, align: 16, flags: DIFlagPublic)
+!350 = !DIDerivedType(tag: DW_TAG_member, name: "ExpectedZero", scope: !317, file: !5, baseType: !351, size: 128, align: 64, extraData: i128 13)
+!351 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ExpectedZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "fb26ef4a66c7e07a914b864dd5cbdaf3")
+!352 = !DIDerivedType(tag: DW_TAG_member, name: "DuplicateAbbreviationCode", scope: !317, file: !5, baseType: !353, size: 128, align: 64, extraData: i128 14)
+!353 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DuplicateAbbreviationCode", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "8fba5777d5e3ba3c664af74dfa9883fa")
+!354 = !DIDerivedType(tag: DW_TAG_member, name: "DuplicateArange", scope: !317, file: !5, baseType: !355, size: 128, align: 64, extraData: i128 15)
+!355 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DuplicateArange", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "92ad6df4b8eab06a4098fd1ab6fd674a")
+!356 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownReservedLength", scope: !317, file: !5, baseType: !357, size: 128, align: 64, extraData: i128 16)
+!357 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownReservedLength", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "2b13d11aaf1e29e4496a5c786d58e5ad")
+!358 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownVersion", scope: !317, file: !5, baseType: !359, size: 128, align: 64, extraData: i128 17)
+!359 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownVersion", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !360, templateParams: !46, identifier: "af46e087b62d3b17df488fbbae5b6e37")
+!360 = !{!361}
+!361 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !359, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!362 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownAbbreviation", scope: !317, file: !5, baseType: !363, size: 128, align: 64, extraData: i128 18)
+!363 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownAbbreviation", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !364, templateParams: !46, identifier: "b3570174fc0c73045fd6de2058159273")
+!364 = !{!365}
+!365 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !363, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!366 = !DIDerivedType(tag: DW_TAG_member, name: "UnexpectedEof", scope: !317, file: !5, baseType: !367, size: 128, align: 64, extraData: i128 19)
+!367 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnexpectedEof", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !368, templateParams: !46, identifier: "c321526e0a5c153ffff82fcf12f05ee5")
+!368 = !{!369}
+!369 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !367, file: !5, baseType: !370, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!370 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ReaderOffsetId", scope: !371, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !372, templateParams: !46, identifier: "10753e39de75edad2b99c864968b4116")
+!371 = !DINamespace(name: "reader", scope: !17)
+!372 = !{!373}
+!373 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !370, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagPublic)
+!374 = !DIDerivedType(tag: DW_TAG_member, name: "UnexpectedNull", scope: !317, file: !5, baseType: !375, size: 128, align: 64, extraData: i128 20)
+!375 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnexpectedNull", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "a086692abce983283debcb3978720724")
+!376 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownStandardOpcode", scope: !317, file: !5, baseType: !377, size: 128, align: 64, extraData: i128 21)
+!377 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownStandardOpcode", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !378, templateParams: !46, identifier: "d17e9cb4063b64896929e2327bb308ce")
+!378 = !{!379}
+!379 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !377, file: !5, baseType: !380, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!380 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwLns", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !381, templateParams: !46, identifier: "62ddfcc3d37948b75bdcdd7f8282c4fd")
+!381 = !{!382}
+!382 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !380, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!383 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownExtendedOpcode", scope: !317, file: !5, baseType: !384, size: 128, align: 64, extraData: i128 22)
+!384 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownExtendedOpcode", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !385, templateParams: !46, identifier: "fe4bafba0822d26228794fa2925d8c1")
+!385 = !{!386}
+!386 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !384, file: !5, baseType: !387, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!387 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwLne", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !388, templateParams: !46, identifier: "d9915819558840f5b2e0f12224d88dd4")
+!388 = !{!389}
+!389 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !387, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!390 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownLocListsEntry", scope: !317, file: !5, baseType: !391, size: 128, align: 64, extraData: i128 23)
+!391 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownLocListsEntry", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !392, templateParams: !46, identifier: "980da01d1d36b39f8f45cd86f8be0b88")
+!392 = !{!393}
+!393 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !391, file: !5, baseType: !394, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!394 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwLle", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !395, templateParams: !46, identifier: "a0030f5b581a30602dd91af017afec49")
+!395 = !{!396}
+!396 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !394, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!397 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownRangeListsEntry", scope: !317, file: !5, baseType: !398, size: 128, align: 64, extraData: i128 24)
+!398 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownRangeListsEntry", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !399, templateParams: !46, identifier: "b1b22f15da3690a528b596a003d5cedb")
+!399 = !{!400}
+!400 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !398, file: !5, baseType: !401, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!401 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwRle", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !402, templateParams: !46, identifier: "fb2890de8a376d35eb233acc57542dcd")
+!402 = !{!403}
+!403 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !401, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!404 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedAddressSize", scope: !317, file: !5, baseType: !405, size: 128, align: 64, extraData: i128 25)
+!405 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedAddressSize", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !406, templateParams: !46, identifier: "600d8151955ba8481767c9c43c8868b4")
+!406 = !{!407}
+!407 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !405, file: !5, baseType: !26, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!408 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedOffsetSize", scope: !317, file: !5, baseType: !409, size: 128, align: 64, extraData: i128 26)
+!409 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedOffsetSize", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !410, templateParams: !46, identifier: "34552b1f282c3a7eaf508f4d4757384")
+!410 = !{!411}
+!411 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !409, file: !5, baseType: !26, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!412 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedFieldSize", scope: !317, file: !5, baseType: !413, size: 128, align: 64, extraData: i128 27)
+!413 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedFieldSize", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !414, templateParams: !46, identifier: "bed14b8322ad5b70d0f0b5e56821cc97")
+!414 = !{!415}
+!415 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !413, file: !5, baseType: !26, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!416 = !DIDerivedType(tag: DW_TAG_member, name: "MinimumInstructionLengthZero", scope: !317, file: !5, baseType: !417, size: 128, align: 64, extraData: i128 28)
+!417 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "MinimumInstructionLengthZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "fa1fd0b565474fe82ccb893a09da66e5")
+!418 = !DIDerivedType(tag: DW_TAG_member, name: "MaximumOperationsPerInstructionZero", scope: !317, file: !5, baseType: !419, size: 128, align: 64, extraData: i128 29)
+!419 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "MaximumOperationsPerInstructionZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "a35b154691400fb6f961d541241b8c31")
+!420 = !DIDerivedType(tag: DW_TAG_member, name: "LineRangeZero", scope: !317, file: !5, baseType: !421, size: 128, align: 64, extraData: i128 30)
+!421 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "LineRangeZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "3c6bfb38e53333f5f80900903619c1b5")
+!422 = !DIDerivedType(tag: DW_TAG_member, name: "OpcodeBaseZero", scope: !317, file: !5, baseType: !423, size: 128, align: 64, extraData: i128 31)
+!423 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OpcodeBaseZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "8f3fdd0c06ec5fb1fc8679e4671305a5")
+!424 = !DIDerivedType(tag: DW_TAG_member, name: "BadUtf8", scope: !317, file: !5, baseType: !425, size: 128, align: 64, extraData: i128 32)
+!425 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadUtf8", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "9b9aef385a70fc9dcd57b2e8b5ab6b24")
+!426 = !DIDerivedType(tag: DW_TAG_member, name: "NotCieId", scope: !317, file: !5, baseType: !427, size: 128, align: 64, extraData: i128 33)
+!427 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NotCieId", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "ebd29b268a6ba10a2b22260f8252bd99")
+!428 = !DIDerivedType(tag: DW_TAG_member, name: "NotCiePointer", scope: !317, file: !5, baseType: !429, size: 128, align: 64, extraData: i128 34)
+!429 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NotCiePointer", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "22e0182bcdf61321493fc4cd39e9e6db")
+!430 = !DIDerivedType(tag: DW_TAG_member, name: "NotFdePointer", scope: !317, file: !5, baseType: !431, size: 128, align: 64, extraData: i128 35)
+!431 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NotFdePointer", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "d0b8d1771a76ab5e69aa55743b84d5b8")
+!432 = !DIDerivedType(tag: DW_TAG_member, name: "BadBranchTarget", scope: !317, file: !5, baseType: !433, size: 128, align: 64, extraData: i128 36)
+!433 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "BadBranchTarget", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !434, templateParams: !46, identifier: "f580dfe27723b7ae4b65f9357871a56e")
+!434 = !{!435}
+!435 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !433, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!436 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidPushObjectAddress", scope: !317, file: !5, baseType: !437, size: 128, align: 64, extraData: i128 37)
+!437 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidPushObjectAddress", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "269d995e87b5d13c7cb611055fbb1cc")
+!438 = !DIDerivedType(tag: DW_TAG_member, name: "NotEnoughStackItems", scope: !317, file: !5, baseType: !439, size: 128, align: 64, extraData: i128 38)
+!439 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NotEnoughStackItems", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "20bcdb0491a3e809303636911dbf8cd")
+!440 = !DIDerivedType(tag: DW_TAG_member, name: "TooManyIterations", scope: !317, file: !5, baseType: !441, size: 128, align: 64, extraData: i128 39)
+!441 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TooManyIterations", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "ff30adf3a0a0372083d438531902f07c")
+!442 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidExpression", scope: !317, file: !5, baseType: !443, size: 128, align: 64, extraData: i128 40)
+!443 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidExpression", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !444, templateParams: !46, identifier: "5651d5c999cb39e0115de3cc6366e2a1")
+!444 = !{!445}
+!445 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !443, file: !5, baseType: !446, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!446 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwOp", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !447, templateParams: !46, identifier: "7f53682bc0f25d737c4504ffbb705411")
+!447 = !{!448}
+!448 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !446, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!449 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedEvaluation", scope: !317, file: !5, baseType: !450, size: 128, align: 64, extraData: i128 41)
+!450 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedEvaluation", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f7f5df83d048fac1b6a625fe43495894")
+!451 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidPiece", scope: !317, file: !5, baseType: !452, size: 128, align: 64, extraData: i128 42)
+!452 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidPiece", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "b63df519ec7f41efddc9e93b531a6ad9")
+!453 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidExpressionTerminator", scope: !317, file: !5, baseType: !454, size: 128, align: 64, extraData: i128 43)
+!454 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidExpressionTerminator", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !455, templateParams: !46, identifier: "3a81ef5c26543c7edcc61e9fc7944fef")
+!455 = !{!456}
+!456 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !454, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!457 = !DIDerivedType(tag: DW_TAG_member, name: "DivisionByZero", scope: !317, file: !5, baseType: !458, size: 128, align: 64, extraData: i128 44)
+!458 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DivisionByZero", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "b76367d376d20e9fbb0ca9f290f2c235")
+!459 = !DIDerivedType(tag: DW_TAG_member, name: "TypeMismatch", scope: !317, file: !5, baseType: !460, size: 128, align: 64, extraData: i128 45)
+!460 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TypeMismatch", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "89d807c67453ebabd4929695d8f66096")
+!461 = !DIDerivedType(tag: DW_TAG_member, name: "IntegralTypeRequired", scope: !317, file: !5, baseType: !462, size: 128, align: 64, extraData: i128 46)
+!462 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "IntegralTypeRequired", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "7b0620dbd2111d443ce6e80003d23fb5")
+!463 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedTypeOperation", scope: !317, file: !5, baseType: !464, size: 128, align: 64, extraData: i128 47)
+!464 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedTypeOperation", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "d08821c006f73bfca08dc2d771305521")
+!465 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidShiftExpression", scope: !317, file: !5, baseType: !466, size: 128, align: 64, extraData: i128 48)
+!466 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidShiftExpression", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f9d7ea923183442e88981e5113d95cd8")
+!467 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownCallFrameInstruction", scope: !317, file: !5, baseType: !468, size: 128, align: 64, extraData: i128 49)
+!468 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownCallFrameInstruction", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !469, templateParams: !46, identifier: "93dffe49e5f883b92277a0558e3cb1b2")
+!469 = !{!470}
+!470 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !468, file: !5, baseType: !471, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!471 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwCfa", scope: !61, file: !5, size: 8, align: 8, flags: DIFlagPublic, elements: !472, templateParams: !46, identifier: "25e8987d4efbed4c58d052b7ad506631")
+!472 = !{!473}
+!473 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !471, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!474 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidAddressRange", scope: !317, file: !5, baseType: !475, size: 128, align: 64, extraData: i128 50)
+!475 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidAddressRange", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "472535216c273e01cbb038f4e1a46c30")
+!476 = !DIDerivedType(tag: DW_TAG_member, name: "CfiInstructionInInvalidContext", scope: !317, file: !5, baseType: !477, size: 128, align: 64, extraData: i128 51)
+!477 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CfiInstructionInInvalidContext", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "eddaee247b191a56aa7bc2dc0985c5da")
+!478 = !DIDerivedType(tag: DW_TAG_member, name: "PopWithEmptyStack", scope: !317, file: !5, baseType: !479, size: 128, align: 64, extraData: i128 52)
+!479 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "PopWithEmptyStack", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "b820ab2462801a0fc0899bf3654c4b8")
+!480 = !DIDerivedType(tag: DW_TAG_member, name: "NoUnwindInfoForAddress", scope: !317, file: !5, baseType: !481, size: 128, align: 64, extraData: i128 53)
+!481 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NoUnwindInfoForAddress", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "ae01ce5aaf3d99bcee8d4f5cd8182484")
+!482 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedOffset", scope: !317, file: !5, baseType: !483, size: 128, align: 64, extraData: i128 54)
+!483 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedOffset", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "2ac7a792140cb6d7bb468e88d6cc2308")
+!484 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownPointerEncoding", scope: !317, file: !5, baseType: !485, size: 128, align: 64, extraData: i128 55)
+!485 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownPointerEncoding", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !486, templateParams: !46, identifier: "81e4dbaa1837b41877cb2650ba0cb059")
+!486 = !{!487}
+!487 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !485, file: !5, baseType: !60, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!488 = !DIDerivedType(tag: DW_TAG_member, name: "NoEntryAtGivenOffset", scope: !317, file: !5, baseType: !489, size: 128, align: 64, extraData: i128 56)
+!489 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "NoEntryAtGivenOffset", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "fe3a27c995940427e718d5e3a088265")
+!490 = !DIDerivedType(tag: DW_TAG_member, name: "OffsetOutOfBounds", scope: !317, file: !5, baseType: !491, size: 128, align: 64, extraData: i128 57)
+!491 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "OffsetOutOfBounds", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "5d4def3cd252d7be8c56a756b8c7d7b")
+!492 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownAugmentation", scope: !317, file: !5, baseType: !493, size: 128, align: 64, extraData: i128 58)
+!493 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownAugmentation", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "dd1c3a83fa13d7501e7c00dbc7ffd4e6")
+!494 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedPointerEncoding", scope: !317, file: !5, baseType: !495, size: 128, align: 64, extraData: i128 59)
+!495 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedPointerEncoding", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f23537dcf6caef7aacff71f94f88ad9a")
+!496 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedRegister", scope: !317, file: !5, baseType: !497, size: 128, align: 64, extraData: i128 60)
+!497 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedRegister", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !498, templateParams: !46, identifier: "69e04d530c2a072b985811e3c0c90332")
+!498 = !{!499}
+!499 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !497, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!500 = !DIDerivedType(tag: DW_TAG_member, name: "TooManyRegisterRules", scope: !317, file: !5, baseType: !501, size: 128, align: 64, extraData: i128 61)
+!501 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TooManyRegisterRules", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "f74f825fd17a89366d5fbcd594ad371e")
+!502 = !DIDerivedType(tag: DW_TAG_member, name: "StackFull", scope: !317, file: !5, baseType: !503, size: 128, align: 64, extraData: i128 62)
+!503 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "StackFull", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "649f9201d23349d0547f2bf0aa454d2c")
+!504 = !DIDerivedType(tag: DW_TAG_member, name: "VariableLengthSearchTable", scope: !317, file: !5, baseType: !505, size: 128, align: 64, extraData: i128 63)
+!505 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "VariableLengthSearchTable", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "c37c43df5d63ae12b1ee17c8b3bac70")
+!506 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedUnitType", scope: !317, file: !5, baseType: !507, size: 128, align: 64, extraData: i128 64)
+!507 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedUnitType", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "c522143e95508ac586d52dd2a2094233")
+!508 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedAddressIndex", scope: !317, file: !5, baseType: !509, size: 128, align: 64, extraData: i128 65)
+!509 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedAddressIndex", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "7f9ad238cf1907731ceaa4464f643d59")
+!510 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedSegmentSize", scope: !317, file: !5, baseType: !511, size: 128, align: 64, extraData: i128 66)
+!511 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedSegmentSize", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "d82bf4bb4842cb1a2d0d0685137bb22")
+!512 = !DIDerivedType(tag: DW_TAG_member, name: "MissingUnitDie", scope: !317, file: !5, baseType: !513, size: 128, align: 64, extraData: i128 67)
+!513 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "MissingUnitDie", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "bfa91df9d2fb7ed5794259b263898a8")
+!514 = !DIDerivedType(tag: DW_TAG_member, name: "UnsupportedAttributeForm", scope: !317, file: !5, baseType: !515, size: 128, align: 64, extraData: i128 68)
+!515 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnsupportedAttributeForm", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "b4d57042fdb22adeec228129f57c3bdb")
+!516 = !DIDerivedType(tag: DW_TAG_member, name: "MissingFileEntryFormatPath", scope: !317, file: !5, baseType: !517, size: 128, align: 64, extraData: i128 69)
+!517 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "MissingFileEntryFormatPath", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "2feb917517f62b9a49da4bec8085de52")
+!518 = !DIDerivedType(tag: DW_TAG_member, name: "ExpectedStringAttributeValue", scope: !317, file: !5, baseType: !519, size: 128, align: 64, extraData: i128 70)
+!519 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ExpectedStringAttributeValue", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "2e1c89ffcc99bcff26aae384fe90a0b5")
+!520 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidImplicitConst", scope: !317, file: !5, baseType: !521, size: 128, align: 64, extraData: i128 71)
+!521 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidImplicitConst", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "aea5e93522c8b1b32186ed286b609796")
+!522 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidIndexSectionCount", scope: !317, file: !5, baseType: !523, size: 128, align: 64, extraData: i128 72)
+!523 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidIndexSectionCount", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "588d1fe04fa40bd64d4b9811824daa5a")
+!524 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidIndexSlotCount", scope: !317, file: !5, baseType: !525, size: 128, align: 64, extraData: i128 73)
+!525 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidIndexSlotCount", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "6274016f543239a9e591b947785d3205")
+!526 = !DIDerivedType(tag: DW_TAG_member, name: "InvalidIndexRow", scope: !317, file: !5, baseType: !527, size: 128, align: 64, extraData: i128 74)
+!527 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "InvalidIndexRow", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, identifier: "59ad95d188560365cbe8c5f5359279c9")
+!528 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownIndexSection", scope: !317, file: !5, baseType: !529, size: 128, align: 64, extraData: i128 75)
+!529 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownIndexSection", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !530, templateParams: !46, identifier: "b73e5f323eb887c2778393497e67883f")
+!530 = !{!531}
+!531 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !529, file: !5, baseType: !532, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!532 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwSect", scope: !61, file: !5, size: 32, align: 32, flags: DIFlagPublic, elements: !533, templateParams: !46, identifier: "2076c0f96c14e3f2bddcc281c845b22f")
+!533 = !{!534}
+!534 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !532, file: !5, baseType: !535, size: 32, align: 32, flags: DIFlagPublic)
+!535 = !DIBasicType(name: "u32", size: 32, encoding: DW_ATE_unsigned)
+!536 = !DIDerivedType(tag: DW_TAG_member, name: "UnknownIndexSectionV2", scope: !317, file: !5, baseType: !537, size: 128, align: 64, extraData: i128 76)
+!537 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnknownIndexSectionV2", scope: !315, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !538, templateParams: !46, identifier: "5ef812c992486c75cad093763c73c88f")
+!538 = !{!539}
+!539 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !537, file: !5, baseType: !540, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!540 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DwSectV2", scope: !61, file: !5, size: 32, align: 32, flags: DIFlagPublic, elements: !541, templateParams: !46, identifier: "f456df61651542439b4c0d56ddfa3e2b")
+!541 = !{!542}
+!542 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !540, file: !5, baseType: !535, size: 32, align: 32, flags: DIFlagPublic)
+!543 = !DIDerivedType(tag: DW_TAG_member, scope: !315, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagArtificial)
+!544 = !DIDerivedType(tag: DW_TAG_member, name: "Err", scope: !307, file: !5, baseType: !545, size: 128, align: 64)
+!545 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Err", scope: !304, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !546, templateParams: !313, identifier: "73d51b53019cec2eb7866e0ccdee2b05")
+!546 = !{!547}
+!547 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !545, file: !5, baseType: !315, size: 128, align: 64, flags: DIFlagPublic)
+!548 = !DIDerivedType(tag: DW_TAG_member, scope: !304, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagArtificial)
+!549 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&unwinding::unwinder::frame::Frame", baseType: !4, size: 64, align: 64, dwarfAddressSpace: 0)
+!550 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&unwinding::unwinder::arch::aarch64::Context", baseType: !551, size: 64, align: 64, dwarfAddressSpace: 0)
+!551 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Context", scope: !552, file: !5, size: 4096, align: 64, flags: DIFlagPublic, elements: !554, templateParams: !46, identifier: "8e981de74a115bb4264fb06b8de66f0")
+!552 = !DINamespace(name: "aarch64", scope: !553)
+!553 = !DINamespace(name: "arch", scope: !7)
+!554 = !{!555, !559, !560}
+!555 = !DIDerivedType(tag: DW_TAG_member, name: "gp", scope: !551, file: !5, baseType: !556, size: 1984, align: 64, flags: DIFlagPublic)
+!556 = !DICompositeType(tag: DW_TAG_array_type, baseType: !21, size: 1984, align: 64, elements: !557)
+!557 = !{!558}
+!558 = !DISubrange(count: 31, lowerBound: 0)
+!559 = !DIDerivedType(tag: DW_TAG_member, name: "sp", scope: !551, file: !5, baseType: !21, size: 64, align: 64, offset: 1984, flags: DIFlagPublic)
+!560 = !DIDerivedType(tag: DW_TAG_member, name: "fp", scope: !551, file: !5, baseType: !561, size: 2048, align: 64, offset: 2048, flags: DIFlagPublic)
+!561 = !DICompositeType(tag: DW_TAG_array_type, baseType: !21, size: 2048, align: 64, elements: !562)
+!562 = !{!563}
+!563 = !DISubrange(count: 32, lowerBound: 0)
+!564 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !565, producer: "clang LLVM (rustc version 1.82.0-nightly (636d7ff91 2024-08-19))", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !566, globals: !597, splitDebugInlining: false, nameTableKind: None)
+!565 = !DIFile(filename: "src/lib.rs/@/unwinding.453513c1ca9c7b65-cgu.0", directory: "/home/dev/ecosystem/unwinding")
+!566 = !{!200, !567, !24, !571, !579, !586, !591}
+!567 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "CieOffsetEncoding", scope: !16, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !568)
+!568 = !{!569, !570}
+!569 = !DIEnumerator(name: "U32", value: 0, isUnsigned: true)
+!570 = !DIEnumerator(name: "U64", value: 1, isUnsigned: true)
+!571 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Alignment", scope: !572, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !574)
+!572 = !DINamespace(name: "rt", scope: !573)
+!573 = !DINamespace(name: "fmt", scope: !40)
+!574 = !{!575, !576, !577, !578}
+!575 = !DIEnumerator(name: "Left", value: 0, isUnsigned: true)
+!576 = !DIEnumerator(name: "Right", value: 1, isUnsigned: true)
+!577 = !DIEnumerator(name: "Center", value: 2, isUnsigned: true)
+!578 = !DIEnumerator(name: "Unknown", value: 3, isUnsigned: true)
+!579 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Ordering", scope: !580, file: !5, baseType: !581, size: 8, align: 8, flags: DIFlagEnumClass, elements: !582)
+!580 = !DINamespace(name: "cmp", scope: !40)
+!581 = !DIBasicType(name: "i8", size: 8, encoding: DW_ATE_signed)
+!582 = !{!583, !584, !585}
+!583 = !DIEnumerator(name: "Less", value: -1)
+!584 = !DIEnumerator(name: "Equal", value: 0)
+!585 = !DIEnumerator(name: "Greater", value: 1)
+!586 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "c_void", scope: !587, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !588)
+!587 = !DINamespace(name: "ffi", scope: !40)
+!588 = !{!589, !590}
+!589 = !DIEnumerator(name: "__variant1", value: 0, isUnsigned: true)
+!590 = !DIEnumerator(name: "__variant2", value: 1, isUnsigned: true)
+!591 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AssertKind", scope: !592, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagEnumClass, elements: !593)
+!592 = !DINamespace(name: "panicking", scope: !40)
+!593 = !{!594, !595, !596}
+!594 = !DIEnumerator(name: "Eq", value: 0, isUnsigned: true)
+!595 = !DIEnumerator(name: "Ne", value: 1, isUnsigned: true)
+!596 = !DIEnumerator(name: "Match", value: 2, isUnsigned: true)
+!597 = !{!598}
+!598 = !DIGlobalVariableExpression(var: !599, expr: !DIExpression())
+!599 = distinct !DIGlobalVariable(name: "<gimli::read::Error as core::fmt::Debug>::{vtable}", scope: null, file: !5, type: !600, isLocal: true, isDefinition: true)
+!600 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "<gimli::read::Error as core::fmt::Debug>::{vtable_type}", file: !5, size: 256, align: 64, flags: DIFlagArtificial, elements: !601, vtableHolder: !315, templateParams: !46, identifier: "1f97312b991e7e51c27c8ed2941b7252")
+!601 = !{!602, !604, !605, !606}
+!602 = !DIDerivedType(tag: DW_TAG_member, name: "drop_in_place", scope: !600, file: !5, baseType: !603, size: 64, align: 64)
+!603 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "*const ()", baseType: !246, size: 64, align: 64, dwarfAddressSpace: 0)
+!604 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !600, file: !5, baseType: !21, size: 64, align: 64, offset: 64)
+!605 = !DIDerivedType(tag: DW_TAG_member, name: "align", scope: !600, file: !5, baseType: !21, size: 64, align: 64, offset: 128)
+!606 = !DIDerivedType(tag: DW_TAG_member, name: "__method3", scope: !600, file: !5, baseType: !603, size: 64, align: 64, offset: 192)
+!607 = !DISubprogram(name: "evaluate_expression", linkageName: "_ZN9unwinding8unwinder5frame5Frame19evaluate_expression17h2bd8716b79f71675E", scope: !4, file: !3, line: 79, type: !302, scopeLine: 79, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit, templateParams: !46)
+!608 = !{!609, !610, !611, !612, !618, !873, !946, !966, !968, !970, !972, !974, !976, !978, !980, !982, !984, !986, !988, !990, !992, !994, !997}
+!609 = !DILocalVariable(name: "self", arg: 1, scope: !2, file: !3, line: 80, type: !549)
+!610 = !DILocalVariable(name: "ctx", arg: 2, scope: !2, file: !3, line: 81, type: !550)
+!611 = !DILocalVariable(name: "expr", arg: 3, scope: !2, file: !3, line: 82, type: !227)
+!612 = !DILocalVariable(name: "expr", scope: !613, file: !3, line: 84, type: !614, align: 8)
+!613 = distinct !DILexicalBlock(scope: !2, file: !3, line: 84, column: 9)
+!614 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Expression<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>", scope: !615, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !616, templateParams: !204, identifier: "c37ff0bc75fe37cf7f7a1245102bd107")
+!615 = !DINamespace(name: "op", scope: !17)
+!616 = !{!617}
+!617 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !614, file: !5, baseType: !120, size: 128, align: 64, flags: DIFlagPublic)
+!618 = !DILocalVariable(name: "eval", scope: !619, file: !3, line: 85, type: !620, align: 8)
+!619 = distinct !DILexicalBlock(scope: !613, file: !3, line: 85, column: 9)
+!620 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Evaluation<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, unwinding::unwinder::frame::StoreOnStack>", scope: !615, file: !5, size: 9728, align: 64, flags: DIFlagPublic, elements: !621, templateParams: !872, identifier: "96ccd7b3bd9690c1bf418466cde67a")
+!621 = !{!622, !623, !629, !630, !644, !645, !704, !705, !779, !780, !804, !816}
+!622 = !DIDerivedType(tag: DW_TAG_member, name: "bytecode", scope: !620, file: !5, baseType: !120, size: 128, align: 64, offset: 512, flags: DIFlagPrivate)
+!623 = !DIDerivedType(tag: DW_TAG_member, name: "encoding", scope: !620, file: !5, baseType: !624, size: 32, align: 16, offset: 9664, flags: DIFlagPrivate)
+!624 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Encoding", scope: !25, file: !5, size: 32, align: 16, flags: DIFlagPublic, elements: !625, templateParams: !46, identifier: "1e5c559cf794bf056cbc617988ad2fe8")
+!625 = !{!626, !627, !628}
+!626 = !DIDerivedType(tag: DW_TAG_member, name: "address_size", scope: !624, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagPublic)
+!627 = !DIDerivedType(tag: DW_TAG_member, name: "format", scope: !624, file: !5, baseType: !24, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!628 = !DIDerivedType(tag: DW_TAG_member, name: "version", scope: !624, file: !5, baseType: !118, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!629 = !DIDerivedType(tag: DW_TAG_member, name: "object_address", scope: !620, file: !5, baseType: !178, size: 128, align: 64, flags: DIFlagPrivate)
+!630 = !DIDerivedType(tag: DW_TAG_member, name: "max_iterations", scope: !620, file: !5, baseType: !631, size: 64, align: 32, offset: 320, flags: DIFlagPrivate)
+!631 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<u32>", scope: !39, file: !5, size: 64, align: 32, flags: DIFlagPublic, elements: !632, templateParams: !46, identifier: "ebe42463e4e7e92377731e8e461eca4b")
+!632 = !{!633}
+!633 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !631, file: !5, size: 64, align: 32, elements: !634, templateParams: !46, identifier: "4e5479196563409542c164f35683db2c", discriminator: !643)
+!634 = !{!635, !639}
+!635 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !633, file: !5, baseType: !636, size: 64, align: 32, extraData: i128 0)
+!636 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !631, file: !5, size: 64, align: 32, flags: DIFlagPublic, elements: !46, templateParams: !637, identifier: "1b68f282e961af132516648785d5c5b")
+!637 = !{!638}
+!638 = !DITemplateTypeParameter(name: "T", type: !535)
+!639 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !633, file: !5, baseType: !640, size: 64, align: 32, extraData: i128 1)
+!640 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !631, file: !5, size: 64, align: 32, flags: DIFlagPublic, elements: !641, templateParams: !637, identifier: "1174a86fa23a90bb9338798d86b9144b")
+!641 = !{!642}
+!642 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !640, file: !5, baseType: !535, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!643 = !DIDerivedType(tag: DW_TAG_member, scope: !631, file: !5, baseType: !535, size: 32, align: 32, flags: DIFlagArtificial)
+!644 = !DIDerivedType(tag: DW_TAG_member, name: "iteration", scope: !620, file: !5, baseType: !535, size: 32, align: 32, offset: 9696, flags: DIFlagPrivate)
+!645 = !DIDerivedType(tag: DW_TAG_member, name: "state", scope: !620, file: !5, baseType: !646, size: 192, align: 64, offset: 128, flags: DIFlagPrivate)
+!646 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EvaluationState<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>", scope: !615, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !647, templateParams: !46, identifier: "a87e1097ab6add42183b1a7b5ce2c22a")
+!647 = !{!648}
+!648 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !646, file: !5, size: 192, align: 64, elements: !649, templateParams: !46, identifier: "964a46dc5f7d0161aef9349b825f4d3c", discriminator: !703)
+!649 = !{!650, !654, !656, !660, !662}
+!650 = !DIDerivedType(tag: DW_TAG_member, name: "Start", scope: !648, file: !5, baseType: !651, size: 192, align: 64, extraData: i128 13)
+!651 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Start", scope: !646, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !652, templateParams: !204, identifier: "676f580c535a7a6db58a5a99c5a2c2d8")
+!652 = !{!653}
+!653 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !651, file: !5, baseType: !178, size: 128, align: 64, offset: 64, flags: DIFlagPrivate)
+!654 = !DIDerivedType(tag: DW_TAG_member, name: "Ready", scope: !648, file: !5, baseType: !655, size: 192, align: 64, extraData: i128 14)
+!655 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ready", scope: !646, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "65ad2c990c0611a1db46a27501104448")
+!656 = !DIDerivedType(tag: DW_TAG_member, name: "Error", scope: !648, file: !5, baseType: !657, size: 192, align: 64, extraData: i128 15)
+!657 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Error", scope: !646, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !658, templateParams: !204, identifier: "dc65c3743b7b42cd171e919a2e16d7b")
+!658 = !{!659}
+!659 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !657, file: !5, baseType: !315, size: 128, align: 64, offset: 64, flags: DIFlagPrivate)
+!660 = !DIDerivedType(tag: DW_TAG_member, name: "Complete", scope: !648, file: !5, baseType: !661, size: 192, align: 64, extraData: i128 16)
+!661 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Complete", scope: !646, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "9bbe3d1f49a3210198c8587f4e96fb03")
+!662 = !DIDerivedType(tag: DW_TAG_member, name: "Waiting", scope: !648, file: !5, baseType: !663, size: 192, align: 64)
+!663 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Waiting", scope: !646, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !664, templateParams: !204, identifier: "584797001e661d96746ac8ed6d5724a6")
+!664 = !{!665}
+!665 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !663, file: !5, baseType: !666, size: 192, align: 64, flags: DIFlagPrivate)
+!666 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EvaluationWaiting<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>", scope: !615, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !667, templateParams: !46, identifier: "54f4ef87ee3afe1d13a2fe921d99fd24")
+!667 = !{!668}
+!668 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !666, file: !5, size: 192, align: 64, elements: !669, templateParams: !46, identifier: "f00c533ddc4d77e115b3ad4cb5c15280", discriminator: !702)
+!669 = !{!670, !672, !676, !680, !682, !684, !686, !688, !690, !692, !694, !698, !700}
+!670 = !DIDerivedType(tag: DW_TAG_member, name: "Memory", scope: !668, file: !5, baseType: !671, size: 192, align: 64, extraData: i128 0)
+!671 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Memory", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "60f0e96ff64536f4e22d06b39e5a1767")
+!672 = !DIDerivedType(tag: DW_TAG_member, name: "Register", scope: !668, file: !5, baseType: !673, size: 192, align: 64, extraData: i128 1)
+!673 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Register", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !674, templateParams: !204, identifier: "f6217dc77671182d67f76e991ad0d34")
+!674 = !{!675}
+!675 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !673, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPrivate)
+!676 = !DIDerivedType(tag: DW_TAG_member, name: "FrameBase", scope: !668, file: !5, baseType: !677, size: 192, align: 64, extraData: i128 2)
+!677 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "FrameBase", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !678, templateParams: !204, identifier: "204f299ba7b55659430baa961a704123")
+!678 = !{!679}
+!679 = !DIDerivedType(tag: DW_TAG_member, name: "offset", scope: !677, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPrivate)
+!680 = !DIDerivedType(tag: DW_TAG_member, name: "Tls", scope: !668, file: !5, baseType: !681, size: 192, align: 64, extraData: i128 3)
+!681 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Tls", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "f74f40e6e4e01256a41b6afde7f8c13")
+!682 = !DIDerivedType(tag: DW_TAG_member, name: "Cfa", scope: !668, file: !5, baseType: !683, size: 192, align: 64, extraData: i128 4)
+!683 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Cfa", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "1cc7f11b0e59e14dc1f82774fcaf9f50")
+!684 = !DIDerivedType(tag: DW_TAG_member, name: "AtLocation", scope: !668, file: !5, baseType: !685, size: 192, align: 64, extraData: i128 5)
+!685 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "AtLocation", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "38d703a8ecd90730b9598d5d108a58c7")
+!686 = !DIDerivedType(tag: DW_TAG_member, name: "EntryValue", scope: !668, file: !5, baseType: !687, size: 192, align: 64, extraData: i128 6)
+!687 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EntryValue", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "18ccb8cef0642e8bb7e43041978cd5a2")
+!688 = !DIDerivedType(tag: DW_TAG_member, name: "ParameterRef", scope: !668, file: !5, baseType: !689, size: 192, align: 64, extraData: i128 7)
+!689 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ParameterRef", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "98799c88bea54fd8e71d4d352afe0a60")
+!690 = !DIDerivedType(tag: DW_TAG_member, name: "RelocatedAddress", scope: !668, file: !5, baseType: !691, size: 192, align: 64, extraData: i128 8)
+!691 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RelocatedAddress", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "681574cd255ae76c95f580792b7e3324")
+!692 = !DIDerivedType(tag: DW_TAG_member, name: "IndexedAddress", scope: !668, file: !5, baseType: !693, size: 192, align: 64, extraData: i128 9)
+!693 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "IndexedAddress", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "8ef93144f1a4ce7466f498c65e53f78d")
+!694 = !DIDerivedType(tag: DW_TAG_member, name: "TypedLiteral", scope: !668, file: !5, baseType: !695, size: 192, align: 64, extraData: i128 10)
+!695 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TypedLiteral", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !696, templateParams: !204, identifier: "950b11aa2ef5d8eac4d1b8c044c1c8ff")
+!696 = !{!697}
+!697 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !695, file: !5, baseType: !120, size: 128, align: 64, offset: 64, flags: DIFlagPrivate)
+!698 = !DIDerivedType(tag: DW_TAG_member, name: "Convert", scope: !668, file: !5, baseType: !699, size: 192, align: 64, extraData: i128 11)
+!699 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Convert", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "ce96d5537bd6c4694efd5485edbf7a81")
+!700 = !DIDerivedType(tag: DW_TAG_member, name: "Reinterpret", scope: !668, file: !5, baseType: !701, size: 192, align: 64, extraData: i128 12)
+!701 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Reinterpret", scope: !666, file: !5, size: 192, align: 64, flags: DIFlagPrivate, elements: !46, templateParams: !204, identifier: "4ec3c0dfd90d332fd315c26b2bb8be1")
+!702 = !DIDerivedType(tag: DW_TAG_member, scope: !666, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!703 = !DIDerivedType(tag: DW_TAG_member, scope: !646, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!704 = !DIDerivedType(tag: DW_TAG_member, name: "addr_mask", scope: !620, file: !5, baseType: !90, size: 64, align: 64, offset: 768, flags: DIFlagPrivate)
+!705 = !DIDerivedType(tag: DW_TAG_member, name: "stack", scope: !620, file: !5, baseType: !706, size: 8256, align: 64, offset: 832, flags: DIFlagPrivate)
+!706 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ArrayVec<[gimli::read::value::Value; 64]>", scope: !237, file: !5, size: 8256, align: 64, flags: DIFlagProtected, elements: !707, templateParams: !776, identifier: "ae126a3705f435911648edbf2c5ecbbc")
+!707 = !{!708, !775}
+!708 = !DIDerivedType(tag: DW_TAG_member, name: "storage", scope: !706, file: !5, baseType: !709, size: 8192, align: 64, flags: DIFlagPrivate)
+!709 = !DICompositeType(tag: DW_TAG_array_type, baseType: !710, size: 8192, align: 64, elements: !773)
+!710 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "MaybeUninit<gimli::read::value::Value>", scope: !242, file: !5, size: 128, align: 64, elements: !711, templateParams: !771, identifier: "2f3d34f66e72ebe9405ea41c59f10b3e")
+!711 = !{!712, !713}
+!712 = !DIDerivedType(tag: DW_TAG_member, name: "uninit", scope: !710, file: !5, baseType: !246, align: 8)
+!713 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !710, file: !5, baseType: !714, size: 128, align: 64)
+!714 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ManuallyDrop<gimli::read::value::Value>", scope: !249, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !715, templateParams: !771, identifier: "fb28eeb8aae47bc0c4a1c0f3b1cb482b")
+!715 = !{!716}
+!716 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !714, file: !5, baseType: !717, size: 128, align: 64, flags: DIFlagPrivate)
+!717 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Value", scope: !718, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !719, templateParams: !46, identifier: "449f2092324ba7422f77464ef34843c0")
+!718 = !DINamespace(name: "value", scope: !17)
+!719 = !{!720}
+!720 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !717, file: !5, size: 128, align: 64, elements: !721, templateParams: !46, identifier: "68c86d4974102053f5e2c86d31812aa6", discriminator: !770)
+!721 = !{!722, !726, !730, !734, !739, !743, !748, !752, !756, !760, !765}
+!722 = !DIDerivedType(tag: DW_TAG_member, name: "Generic", scope: !720, file: !5, baseType: !723, size: 128, align: 64, extraData: i128 0)
+!723 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Generic", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !724, templateParams: !46, identifier: "aded9ad7d102fa3d3f7c711747aa177f")
+!724 = !{!725}
+!725 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !723, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!726 = !DIDerivedType(tag: DW_TAG_member, name: "I8", scope: !720, file: !5, baseType: !727, size: 128, align: 64, extraData: i128 1)
+!727 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I8", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !728, templateParams: !46, identifier: "b7e241735c2437539ef217967df198d5")
+!728 = !{!729}
+!729 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !727, file: !5, baseType: !581, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!730 = !DIDerivedType(tag: DW_TAG_member, name: "U8", scope: !720, file: !5, baseType: !731, size: 128, align: 64, extraData: i128 2)
+!731 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "U8", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !732, templateParams: !46, identifier: "235cabb3f8c8ea232e140126fb0f84")
+!732 = !{!733}
+!733 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !731, file: !5, baseType: !26, size: 8, align: 8, offset: 8, flags: DIFlagPublic)
+!734 = !DIDerivedType(tag: DW_TAG_member, name: "I16", scope: !720, file: !5, baseType: !735, size: 128, align: 64, extraData: i128 3)
+!735 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I16", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !736, templateParams: !46, identifier: "dcfa782be57e9cc55e549bb113b04a06")
+!736 = !{!737}
+!737 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !735, file: !5, baseType: !738, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!738 = !DIBasicType(name: "i16", size: 16, encoding: DW_ATE_signed)
+!739 = !DIDerivedType(tag: DW_TAG_member, name: "U16", scope: !720, file: !5, baseType: !740, size: 128, align: 64, extraData: i128 4)
+!740 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "U16", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !741, templateParams: !46, identifier: "f05631058507a1c5a5bf2d6cc5754e47")
+!741 = !{!742}
+!742 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !740, file: !5, baseType: !118, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!743 = !DIDerivedType(tag: DW_TAG_member, name: "I32", scope: !720, file: !5, baseType: !744, size: 128, align: 64, extraData: i128 5)
+!744 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I32", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !745, templateParams: !46, identifier: "d2b5ebdf257a9ced33cecf5822df487f")
+!745 = !{!746}
+!746 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !744, file: !5, baseType: !747, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!747 = !DIBasicType(name: "i32", size: 32, encoding: DW_ATE_signed)
+!748 = !DIDerivedType(tag: DW_TAG_member, name: "U32", scope: !720, file: !5, baseType: !749, size: 128, align: 64, extraData: i128 6)
+!749 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "U32", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !750, templateParams: !46, identifier: "34e90881764c38957273f3906eba341e")
+!750 = !{!751}
+!751 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !749, file: !5, baseType: !535, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!752 = !DIDerivedType(tag: DW_TAG_member, name: "I64", scope: !720, file: !5, baseType: !753, size: 128, align: 64, extraData: i128 7)
+!753 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I64", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !754, templateParams: !46, identifier: "30d890c34543c5fdd47c6eb9c24adb3d")
+!754 = !{!755}
+!755 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !753, file: !5, baseType: !113, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!756 = !DIDerivedType(tag: DW_TAG_member, name: "U64", scope: !720, file: !5, baseType: !757, size: 128, align: 64, extraData: i128 8)
+!757 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "U64", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !758, templateParams: !46, identifier: "8c8d09ef3a30b56e8997e64f4a4a569d")
+!758 = !{!759}
+!759 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !757, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!760 = !DIDerivedType(tag: DW_TAG_member, name: "F32", scope: !720, file: !5, baseType: !761, size: 128, align: 64, extraData: i128 9)
+!761 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "F32", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !762, templateParams: !46, identifier: "b561c863896d760d8a61fadefdeec708")
+!762 = !{!763}
+!763 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !761, file: !5, baseType: !764, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+!764 = !DIBasicType(name: "f32", size: 32, encoding: DW_ATE_float)
+!765 = !DIDerivedType(tag: DW_TAG_member, name: "F64", scope: !720, file: !5, baseType: !766, size: 128, align: 64, extraData: i128 10)
+!766 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "F64", scope: !717, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !767, templateParams: !46, identifier: "833cddc71cbeff4994a97bcd2953d5e9")
+!767 = !{!768}
+!768 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !766, file: !5, baseType: !769, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!769 = !DIBasicType(name: "f64", size: 64, encoding: DW_ATE_float)
+!770 = !DIDerivedType(tag: DW_TAG_member, scope: !717, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagArtificial)
+!771 = !{!772}
+!772 = !DITemplateTypeParameter(name: "T", type: !717)
+!773 = !{!774}
+!774 = !DISubrange(count: 64, lowerBound: 0)
+!775 = !DIDerivedType(tag: DW_TAG_member, name: "len", scope: !706, file: !5, baseType: !21, size: 64, align: 64, offset: 8192, flags: DIFlagPrivate)
+!776 = !{!777}
+!777 = !DITemplateTypeParameter(name: "A", type: !778)
+!778 = !DICompositeType(tag: DW_TAG_array_type, baseType: !717, size: 8192, align: 64, elements: !773)
+!779 = !DIDerivedType(tag: DW_TAG_member, name: "pc", scope: !620, file: !5, baseType: !120, size: 128, align: 64, offset: 640, flags: DIFlagPrivate)
+!780 = !DIDerivedType(tag: DW_TAG_member, name: "expression_stack", scope: !620, file: !5, baseType: !781, size: 64, align: 64, offset: 9088, flags: DIFlagPrivate)
+!781 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ArrayVec<[(gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>); 0]>", scope: !237, file: !5, size: 64, align: 64, flags: DIFlagProtected, elements: !782, templateParams: !801, identifier: "8d9c2b45a9b65c919587001001717f17")
+!782 = !{!783, !800}
+!783 = !DIDerivedType(tag: DW_TAG_member, name: "storage", scope: !781, file: !5, baseType: !784, align: 64, flags: DIFlagPrivate)
+!784 = !DICompositeType(tag: DW_TAG_array_type, baseType: !785, align: 64, elements: !798)
+!785 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "MaybeUninit<(gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>)>", scope: !242, file: !5, size: 256, align: 64, elements: !786, templateParams: !796, identifier: "b96b05b25060709f1f9ba7dbb105f622")
+!786 = !{!787, !788}
+!787 = !DIDerivedType(tag: DW_TAG_member, name: "uninit", scope: !785, file: !5, baseType: !246, align: 8)
+!788 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !785, file: !5, baseType: !789, size: 256, align: 64)
+!789 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ManuallyDrop<(gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>)>", scope: !249, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !790, templateParams: !796, identifier: "5ab52ed8045fcd3b3de870a7f5e8be9c")
+!790 = !{!791}
+!791 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !789, file: !5, baseType: !792, size: 256, align: 64, flags: DIFlagPrivate)
+!792 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "(gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>)", file: !5, size: 256, align: 64, elements: !793, templateParams: !46, identifier: "120acc42d3a3b94d11c3bb50c5e39835")
+!793 = !{!794, !795}
+!794 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !792, file: !5, baseType: !120, size: 128, align: 64)
+!795 = !DIDerivedType(tag: DW_TAG_member, name: "__1", scope: !792, file: !5, baseType: !120, size: 128, align: 64, offset: 128)
+!796 = !{!797}
+!797 = !DITemplateTypeParameter(name: "T", type: !792)
+!798 = !{!799}
+!799 = !DISubrange(count: 0, lowerBound: 0)
+!800 = !DIDerivedType(tag: DW_TAG_member, name: "len", scope: !781, file: !5, baseType: !21, size: 64, align: 64, flags: DIFlagPrivate)
+!801 = !{!802}
+!802 = !DITemplateTypeParameter(name: "A", type: !803)
+!803 = !DICompositeType(tag: DW_TAG_array_type, baseType: !792, align: 64, elements: !798)
+!804 = !DIDerivedType(tag: DW_TAG_member, name: "value_result", scope: !620, file: !5, baseType: !805, size: 128, align: 64, offset: 384, flags: DIFlagPrivate)
+!805 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<gimli::read::value::Value>", scope: !39, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !806, templateParams: !46, identifier: "e1364e1d42e393154265328f988592b1")
+!806 = !{!807}
+!807 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !805, file: !5, size: 128, align: 64, elements: !808, templateParams: !46, identifier: "2963a62c96890b42de64d3f49ad31868", discriminator: !815)
+!808 = !{!809, !811}
+!809 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !807, file: !5, baseType: !810, size: 128, align: 64, extraData: i128 11)
+!810 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !805, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !771, identifier: "2683f935a05e998d96e137521e8b07c3")
+!811 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !807, file: !5, baseType: !812, size: 128, align: 64)
+!812 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !805, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !813, templateParams: !771, identifier: "5d048e646ac07fd732c4ffd8213bf634")
+!813 = !{!814}
+!814 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !812, file: !5, baseType: !717, size: 128, align: 64, flags: DIFlagPublic)
+!815 = !DIDerivedType(tag: DW_TAG_member, scope: !805, file: !5, baseType: !26, size: 8, align: 8, flags: DIFlagArtificial)
+!816 = !DIDerivedType(tag: DW_TAG_member, name: "result", scope: !620, file: !5, baseType: !817, size: 512, align: 64, offset: 9152, flags: DIFlagPrivate)
+!817 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ArrayVec<[gimli::read::op::Piece<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>; 1]>", scope: !237, file: !5, size: 512, align: 64, flags: DIFlagProtected, elements: !818, templateParams: !869, identifier: "a656b8eefdf176ad8e0ebffbfe315302")
+!818 = !{!819, !868}
+!819 = !DIDerivedType(tag: DW_TAG_member, name: "storage", scope: !817, file: !5, baseType: !820, size: 448, align: 64, flags: DIFlagPrivate)
+!820 = !DICompositeType(tag: DW_TAG_array_type, baseType: !821, size: 448, align: 64, elements: !866)
+!821 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "MaybeUninit<gimli::read::op::Piece<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>>", scope: !242, file: !5, size: 448, align: 64, elements: !822, templateParams: !864, identifier: "2530b61c20edfb23a48f57d629ac63e3")
+!822 = !{!823, !824}
+!823 = !DIDerivedType(tag: DW_TAG_member, name: "uninit", scope: !821, file: !5, baseType: !246, align: 8)
+!824 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !821, file: !5, baseType: !825, size: 448, align: 64)
+!825 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ManuallyDrop<gimli::read::op::Piece<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>>", scope: !249, file: !5, size: 448, align: 64, flags: DIFlagPublic, elements: !826, templateParams: !864, identifier: "82751d4c9232c7ab6fbf72d12a97f018")
+!826 = !{!827}
+!827 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !825, file: !5, baseType: !828, size: 448, align: 64, flags: DIFlagPrivate)
+!828 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Piece<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>", scope: !615, file: !5, size: 448, align: 64, flags: DIFlagPublic, elements: !829, templateParams: !134, identifier: "d1f755cca082fc24287af19e88191dcf")
+!829 = !{!830, !831, !832}
+!830 = !DIDerivedType(tag: DW_TAG_member, name: "size_in_bits", scope: !828, file: !5, baseType: !178, size: 128, align: 64, flags: DIFlagPublic)
+!831 = !DIDerivedType(tag: DW_TAG_member, name: "bit_offset", scope: !828, file: !5, baseType: !178, size: 128, align: 64, offset: 128, flags: DIFlagPublic)
+!832 = !DIDerivedType(tag: DW_TAG_member, name: "location", scope: !828, file: !5, baseType: !833, size: 192, align: 64, offset: 256, flags: DIFlagPublic)
+!833 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Location<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>", scope: !615, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !834, templateParams: !46, identifier: "d395133be8e29139e6d036a34ad208b9")
+!834 = !{!835}
+!835 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !833, file: !5, size: 192, align: 64, elements: !836, templateParams: !46, identifier: "55c2436db36ecc04a2c666eb5b859a4c", discriminator: !863)
+!836 = !{!837, !839, !843, !847, !851, !855}
+!837 = !DIDerivedType(tag: DW_TAG_member, name: "Empty", scope: !835, file: !5, baseType: !838, size: 192, align: 64, extraData: i128 0)
+!838 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Empty", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !134, identifier: "b49e8bb1f2f8602041a87566dbfb91d2")
+!839 = !DIDerivedType(tag: DW_TAG_member, name: "Register", scope: !835, file: !5, baseType: !840, size: 192, align: 64, extraData: i128 1)
+!840 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Register", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !841, templateParams: !134, identifier: "7eec3de9d20d4cc0862ee3fa2252804e")
+!841 = !{!842}
+!842 = !DIDerivedType(tag: DW_TAG_member, name: "register", scope: !840, file: !5, baseType: !115, size: 16, align: 16, offset: 16, flags: DIFlagPublic)
+!843 = !DIDerivedType(tag: DW_TAG_member, name: "Address", scope: !835, file: !5, baseType: !844, size: 192, align: 64, extraData: i128 2)
+!844 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Address", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !845, templateParams: !134, identifier: "ea4da798d514389ff4fefeb6e23e4cb7")
+!845 = !{!846}
+!846 = !DIDerivedType(tag: DW_TAG_member, name: "address", scope: !844, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!847 = !DIDerivedType(tag: DW_TAG_member, name: "Value", scope: !835, file: !5, baseType: !848, size: 192, align: 64, extraData: i128 3)
+!848 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Value", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !849, templateParams: !134, identifier: "d10a76e8b53088f5d90ae0638223103")
+!849 = !{!850}
+!850 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !848, file: !5, baseType: !717, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!851 = !DIDerivedType(tag: DW_TAG_member, name: "Bytes", scope: !835, file: !5, baseType: !852, size: 192, align: 64, extraData: i128 4)
+!852 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Bytes", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !853, templateParams: !134, identifier: "b8af572c2d30f325beba0759bb7b15bc")
+!853 = !{!854}
+!854 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !852, file: !5, baseType: !120, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!855 = !DIDerivedType(tag: DW_TAG_member, name: "ImplicitPointer", scope: !835, file: !5, baseType: !856, size: 192, align: 64, extraData: i128 5)
+!856 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ImplicitPointer", scope: !833, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !857, templateParams: !134, identifier: "bdbf25dabffeb5d0679ee8e169accba1")
+!857 = !{!858, !862}
+!858 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !856, file: !5, baseType: !859, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!859 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DebugInfoOffset<usize>", scope: !25, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !860, templateParams: !221, identifier: "240ef1e2d1384c4db51fe4b33cb864ae")
+!860 = !{!861}
+!861 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !859, file: !5, baseType: !21, size: 64, align: 64, flags: DIFlagPublic)
+!862 = !DIDerivedType(tag: DW_TAG_member, name: "byte_offset", scope: !856, file: !5, baseType: !113, size: 64, align: 64, offset: 128, flags: DIFlagPublic)
+!863 = !DIDerivedType(tag: DW_TAG_member, scope: !833, file: !5, baseType: !118, size: 16, align: 16, flags: DIFlagArtificial)
+!864 = !{!865}
+!865 = !DITemplateTypeParameter(name: "T", type: !828)
+!866 = !{!867}
+!867 = !DISubrange(count: 1, lowerBound: 0)
+!868 = !DIDerivedType(tag: DW_TAG_member, name: "len", scope: !817, file: !5, baseType: !21, size: 64, align: 64, offset: 448, flags: DIFlagPrivate)
+!869 = !{!870}
+!870 = !DITemplateTypeParameter(name: "A", type: !871)
+!871 = !DICompositeType(tag: DW_TAG_array_type, baseType: !828, size: 448, align: 64, elements: !866)
+!872 = !{!135, !300}
+!873 = !DILocalVariable(name: "result", scope: !874, file: !3, line: 87, type: !875, align: 8)
+!874 = distinct !DILexicalBlock(scope: !619, file: !3, line: 87, column: 9)
+!875 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "EvaluationResult<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>", scope: !615, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !876, templateParams: !46, identifier: "51d4bb2ed321c46272ccbeec740b49c")
+!876 = !{!877}
+!877 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !875, file: !5, size: 320, align: 64, elements: !878, templateParams: !46, identifier: "b5b0d5dbd4679161fe022116a9af800", discriminator: !945)
+!878 = !{!879, !881, !891, !896, !898, !902, !904, !921, !925, !929, !933, !941}
+!879 = !DIDerivedType(tag: DW_TAG_member, name: "Complete", scope: !877, file: !5, baseType: !880, size: 320, align: 64, extraData: i128 2)
+!880 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Complete", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !204, identifier: "98b5d1403036cb41e2f44cc6ea0efccd")
+!881 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresMemory", scope: !877, file: !5, baseType: !882, size: 320, align: 64)
+!882 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresMemory", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !883, templateParams: !204, identifier: "8003e489afddfa79b3b73f5fc4a14802")
+!883 = !{!884, !885, !886, !887}
+!884 = !DIDerivedType(tag: DW_TAG_member, name: "address", scope: !882, file: !5, baseType: !90, size: 64, align: 64, offset: 128, flags: DIFlagPublic)
+!885 = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: !882, file: !5, baseType: !26, size: 8, align: 8, offset: 256, flags: DIFlagPublic)
+!886 = !DIDerivedType(tag: DW_TAG_member, name: "space", scope: !882, file: !5, baseType: !178, size: 128, align: 64, flags: DIFlagPublic)
+!887 = !DIDerivedType(tag: DW_TAG_member, name: "base_type", scope: !882, file: !5, baseType: !888, size: 64, align: 64, offset: 192, flags: DIFlagPublic)
+!888 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnitOffset<usize>", scope: !17, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !889, templateParams: !221, identifier: "da1d24a786a32ca5ac7efb4fa178ae2b")
+!889 = !{!890}
+!890 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !888, file: !5, baseType: !21, size: 64, align: 64, flags: DIFlagPublic)
+!891 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresRegister", scope: !877, file: !5, baseType: !892, size: 320, align: 64, extraData: i128 4)
+!892 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresRegister", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !893, templateParams: !204, identifier: "1536d1e70f21cd80ea98159552d5b03")
+!893 = !{!894, !895}
+!894 = !DIDerivedType(tag: DW_TAG_member, name: "register", scope: !892, file: !5, baseType: !115, size: 16, align: 16, offset: 128, flags: DIFlagPublic)
+!895 = !DIDerivedType(tag: DW_TAG_member, name: "base_type", scope: !892, file: !5, baseType: !888, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!896 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresFrameBase", scope: !877, file: !5, baseType: !897, size: 320, align: 64, extraData: i128 5)
+!897 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresFrameBase", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !204, identifier: "aa3ffa474f6042aaf55b84c6e95fc29")
+!898 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresTls", scope: !877, file: !5, baseType: !899, size: 320, align: 64, extraData: i128 6)
+!899 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresTls", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !900, templateParams: !204, identifier: "b1e8d9ef29a993fcbf2f73f7de6e296b")
+!900 = !{!901}
+!901 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !899, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!902 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresCallFrameCfa", scope: !877, file: !5, baseType: !903, size: 320, align: 64, extraData: i128 7)
+!903 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresCallFrameCfa", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !204, identifier: "72816062d26fd7b762fb5e5ed129ff2")
+!904 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresAtLocation", scope: !877, file: !5, baseType: !905, size: 320, align: 64, extraData: i128 8)
+!905 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresAtLocation", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !906, templateParams: !204, identifier: "2af96c961b57c627b47a5a74aa0f23eb")
+!906 = !{!907}
+!907 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !905, file: !5, baseType: !908, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!908 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DieReference<usize>", scope: !615, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !909, templateParams: !46, identifier: "634c048e6d97d42d69a1dd4c2a2b8d27")
+!909 = !{!910}
+!910 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !908, file: !5, size: 128, align: 64, elements: !911, templateParams: !46, identifier: "8970939422a637ddea811e213749faa", discriminator: !920)
+!911 = !{!912, !916}
+!912 = !DIDerivedType(tag: DW_TAG_member, name: "UnitRef", scope: !910, file: !5, baseType: !913, size: 128, align: 64, extraData: i128 0)
+!913 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnitRef", scope: !908, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !914, templateParams: !221, identifier: "a26a62911cf741a4d4a44e27f0a5ed51")
+!914 = !{!915}
+!915 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !913, file: !5, baseType: !888, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!916 = !DIDerivedType(tag: DW_TAG_member, name: "DebugInfoRef", scope: !910, file: !5, baseType: !917, size: 128, align: 64, extraData: i128 1)
+!917 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DebugInfoRef", scope: !908, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !918, templateParams: !221, identifier: "301c17503a4b73a819dffa9d3ff4f17")
+!918 = !{!919}
+!919 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !917, file: !5, baseType: !859, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!920 = !DIDerivedType(tag: DW_TAG_member, scope: !908, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!921 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresEntryValue", scope: !877, file: !5, baseType: !922, size: 320, align: 64, extraData: i128 9)
+!922 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresEntryValue", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !923, templateParams: !204, identifier: "faa1a56431d594ba83847c61cba8413e")
+!923 = !{!924}
+!924 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !922, file: !5, baseType: !614, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!925 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresParameterRef", scope: !877, file: !5, baseType: !926, size: 320, align: 64, extraData: i128 10)
+!926 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresParameterRef", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !927, templateParams: !204, identifier: "49d532a26f9036eb3e31efa5492676d5")
+!927 = !{!928}
+!928 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !926, file: !5, baseType: !888, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!929 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresRelocatedAddress", scope: !877, file: !5, baseType: !930, size: 320, align: 64, extraData: i128 11)
+!930 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresRelocatedAddress", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !931, templateParams: !204, identifier: "6113feb90ddaa61b8e4c18f58521fe69")
+!931 = !{!932}
+!932 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !930, file: !5, baseType: !90, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!933 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresIndexedAddress", scope: !877, file: !5, baseType: !934, size: 320, align: 64, extraData: i128 12)
+!934 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresIndexedAddress", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !935, templateParams: !204, identifier: "c88ebe4f9abcc34adc5bed282975fd47")
+!935 = !{!936, !940}
+!936 = !DIDerivedType(tag: DW_TAG_member, name: "index", scope: !934, file: !5, baseType: !937, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!937 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "DebugAddrIndex<usize>", scope: !25, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !938, templateParams: !221, identifier: "ab8b077a231ba172fd0f54a5426fad2b")
+!938 = !{!939}
+!939 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !937, file: !5, baseType: !21, size: 64, align: 64, flags: DIFlagPublic)
+!940 = !DIDerivedType(tag: DW_TAG_member, name: "relocate", scope: !934, file: !5, baseType: !103, size: 8, align: 8, offset: 128, flags: DIFlagPublic)
+!941 = !DIDerivedType(tag: DW_TAG_member, name: "RequiresBaseType", scope: !877, file: !5, baseType: !942, size: 320, align: 64, extraData: i128 13)
+!942 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "RequiresBaseType", scope: !875, file: !5, size: 320, align: 64, flags: DIFlagPublic, elements: !943, templateParams: !204, identifier: "737c9cc6266a1749293672abd5b69040")
+!943 = !{!944}
+!944 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !942, file: !5, baseType: !888, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!945 = !DIDerivedType(tag: DW_TAG_member, scope: !875, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!946 = !DILocalVariable(name: "residual", scope: !947, file: !3, line: 87, type: !948, align: 8)
+!947 = distinct !DILexicalBlock(scope: !619, file: !3, line: 87, column: 41)
+!948 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result<core::convert::Infallible, gimli::read::Error>", scope: !305, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !949, templateParams: !46, identifier: "919ce7a601fb4a13ba9fff7fa5c31214")
+!949 = !{!950}
+!950 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !948, file: !5, size: 128, align: 64, elements: !951, templateParams: !46, identifier: "4d9c0f1a76ad6640c91b178b0a25cb6d")
+!951 = !{!952, !962}
+!952 = !DIDerivedType(tag: DW_TAG_member, name: "Ok", scope: !950, file: !5, baseType: !953, size: 128, align: 64)
+!953 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ok", scope: !948, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !954, templateParams: !960, identifier: "80500a1964edcdf9df1c06fd016a020c")
+!954 = !{!955}
+!955 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !953, file: !5, baseType: !956, align: 8, flags: DIFlagPublic)
+!956 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Infallible", scope: !957, file: !5, align: 8, flags: DIFlagPublic, elements: !958, templateParams: !46, identifier: "64765147ab70b22e683668bfcd1e19c6")
+!957 = !DINamespace(name: "convert", scope: !40)
+!958 = !{!959}
+!959 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !956, file: !5, align: 8, elements: !46, identifier: "7ae49c0d49b0241a817f4820926e50a2")
+!960 = !{!961, !314}
+!961 = !DITemplateTypeParameter(name: "T", type: !956)
+!962 = !DIDerivedType(tag: DW_TAG_member, name: "Err", scope: !950, file: !5, baseType: !963, size: 128, align: 64)
+!963 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Err", scope: !948, file: !5, size: 128, align: 64, flags: DIFlagPublic, elements: !964, templateParams: !960, identifier: "b0437e745c7777a31897e0e59bf1640b")
+!964 = !{!965}
+!965 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !963, file: !5, baseType: !315, size: 128, align: 64, flags: DIFlagPublic)
+!966 = !DILocalVariable(name: "val", scope: !967, file: !3, line: 87, type: !875, align: 8)
+!967 = distinct !DILexicalBlock(scope: !619, file: !3, line: 87, column: 26)
+!968 = !DILocalVariable(name: "address", scope: !969, file: !3, line: 91, type: !90, align: 8)
+!969 = distinct !DILexicalBlock(scope: !874, file: !3, line: 91, column: 17)
+!970 = !DILocalVariable(name: "value", scope: !971, file: !3, line: 92, type: !21, align: 8)
+!971 = distinct !DILexicalBlock(scope: !969, file: !3, line: 92, column: 21)
+!972 = !DILocalVariable(name: "residual", scope: !973, file: !3, line: 93, type: !948, align: 8)
+!973 = distinct !DILexicalBlock(scope: !971, file: !3, line: 93, column: 81)
+!974 = !DILocalVariable(name: "val", scope: !975, file: !3, line: 93, type: !875, align: 8)
+!975 = distinct !DILexicalBlock(scope: !971, file: !3, line: 93, column: 30)
+!976 = !DILocalVariable(name: "register", scope: !977, file: !3, line: 95, type: !115, align: 2)
+!977 = distinct !DILexicalBlock(scope: !874, file: !3, line: 95, column: 17)
+!978 = !DILocalVariable(name: "value", scope: !979, file: !3, line: 96, type: !21, align: 8)
+!979 = distinct !DILexicalBlock(scope: !977, file: !3, line: 96, column: 21)
+!980 = !DILocalVariable(name: "residual", scope: !981, file: !3, line: 97, type: !948, align: 8)
+!981 = distinct !DILexicalBlock(scope: !979, file: !3, line: 97, column: 83)
+!982 = !DILocalVariable(name: "val", scope: !983, file: !3, line: 97, type: !875, align: 8)
+!983 = distinct !DILexicalBlock(scope: !979, file: !3, line: 97, column: 30)
+!984 = !DILocalVariable(name: "address", scope: !985, file: !3, line: 99, type: !90, align: 8)
+!985 = distinct !DILexicalBlock(scope: !874, file: !3, line: 99, column: 17)
+!986 = !DILocalVariable(name: "value", scope: !987, file: !3, line: 100, type: !21, align: 8)
+!987 = distinct !DILexicalBlock(scope: !985, file: !3, line: 100, column: 21)
+!988 = !DILocalVariable(name: "residual", scope: !989, file: !3, line: 101, type: !948, align: 8)
+!989 = distinct !DILexicalBlock(scope: !987, file: !3, line: 101, column: 81)
+!990 = !DILocalVariable(name: "val", scope: !991, file: !3, line: 101, type: !875, align: 8)
+!991 = distinct !DILexicalBlock(scope: !987, file: !3, line: 101, column: 30)
+!992 = !DILocalVariable(name: "residual", scope: !993, file: !3, line: 111, type: !948, align: 8)
+!993 = distinct !DILexicalBlock(scope: !874, file: !3, line: 111, column: 56)
+!994 = !DILocalVariable(name: "val", scope: !995, file: !3, line: 108, type: !996, align: 8)
+!995 = distinct !DILexicalBlock(scope: !874, file: !3, line: 108, column: 19)
+!996 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&gimli::read::op::Piece<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>, usize>", baseType: !828, size: 64, align: 64, dwarfAddressSpace: 0)
+!997 = !DILocalVariable(name: "address", scope: !998, file: !3, line: 114, type: !90, align: 8)
+!998 = distinct !DILexicalBlock(scope: !874, file: !3, line: 114, column: 17)
+!999 = !DILocation(line: 1102, column: 23, scope: !1000, inlinedAt: !1038)
+!1000 = distinct !DILexicalBlock(scope: !1002, file: !1001, line: 1102, column: 13)
+!1001 = !DIFile(filename: "/rustc/636d7ff91b9847d6d43c7bbe023568828f6e3246/library/core/src/result.rs", directory: "", checksumkind: CSK_MD5, checksum: "13dbc19e8bd386b8c9d62247cee85b56")
+!1002 = distinct !DISubprogram(name: "unwrap<gimli::read::op::Expression<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>, gimli::read::Error>", linkageName: "_ZN4core6result19Result$LT$T$C$E$GT$6unwrap17h14fd7c0569eb842aE", scope: !1003, file: !1001, line: 1096, type: !1018, scopeLine: 1096, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !564, templateParams: !1011, declaration: !1032, retainedNodes: !1033)
+!1003 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Result<gimli::read::op::Expression<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>, gimli::read::Error>", scope: !305, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !1004, templateParams: !46, identifier: "769c6b05a4491edd8f1f7ebaabbcd9ce")
+!1004 = !{!1005}
+!1005 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !1003, file: !5, size: 192, align: 64, elements: !1006, templateParams: !46, identifier: "f07482fa33f1f68430e7cc41a9027a6b", discriminator: !1017)
+!1006 = !{!1007, !1013}
+!1007 = !DIDerivedType(tag: DW_TAG_member, name: "Ok", scope: !1005, file: !5, baseType: !1008, size: 192, align: 64, extraData: i128 0)
+!1008 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ok", scope: !1003, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !1009, templateParams: !1011, identifier: "b3503abe43c6c1a92f481dfc52138ec2")
+!1009 = !{!1010}
+!1010 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1008, file: !5, baseType: !614, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!1011 = !{!1012, !314}
+!1012 = !DITemplateTypeParameter(name: "T", type: !614)
+!1013 = !DIDerivedType(tag: DW_TAG_member, name: "Err", scope: !1005, file: !5, baseType: !1014, size: 192, align: 64, extraData: i128 1)
+!1014 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Err", scope: !1003, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !1015, templateParams: !1011, identifier: "4538ee170ef87c5e49ce2f27f79f37a2")
+!1015 = !{!1016}
+!1016 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1014, file: !5, baseType: !315, size: 128, align: 64, offset: 64, flags: DIFlagPublic)
+!1017 = !DIDerivedType(tag: DW_TAG_member, scope: !1003, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!1018 = !DISubroutineType(types: !1019)
+!1019 = !{!614, !1003, !1020}
+!1020 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&core::panic::location::Location", baseType: !1021, size: 64, align: 64, dwarfAddressSpace: 0)
+!1021 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Location", scope: !1022, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !1024, templateParams: !46, identifier: "e063870a552be7101e2bcd793a8716b0")
+!1022 = !DINamespace(name: "location", scope: !1023)
+!1023 = !DINamespace(name: "panic", scope: !40)
+!1024 = !{!1025, !1030, !1031}
+!1025 = !DIDerivedType(tag: DW_TAG_member, name: "file", scope: !1021, file: !5, baseType: !1026, size: 128, align: 64, flags: DIFlagPrivate)
+!1026 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "&str", file: !5, size: 128, align: 64, elements: !1027, templateParams: !46, identifier: "9277eecd40495f85161460476aacc992")
+!1027 = !{!1028, !1029}
+!1028 = !DIDerivedType(tag: DW_TAG_member, name: "data_ptr", scope: !1026, file: !5, baseType: !127, size: 64, align: 64)
+!1029 = !DIDerivedType(tag: DW_TAG_member, name: "length", scope: !1026, file: !5, baseType: !21, size: 64, align: 64, offset: 64)
+!1030 = !DIDerivedType(tag: DW_TAG_member, name: "line", scope: !1021, file: !5, baseType: !535, size: 32, align: 32, offset: 128, flags: DIFlagPrivate)
+!1031 = !DIDerivedType(tag: DW_TAG_member, name: "col", scope: !1021, file: !5, baseType: !535, size: 32, align: 32, offset: 160, flags: DIFlagPrivate)
+!1032 = !DISubprogram(name: "unwrap<gimli::read::op::Expression<gimli::read::endian_slice::EndianSlice<gimli::endianity::LittleEndian>>, gimli::read::Error>", linkageName: "_ZN4core6result19Result$LT$T$C$E$GT$6unwrap17h14fd7c0569eb842aE", scope: !1003, file: !1001, line: 1096, type: !1018, scopeLine: 1096, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit, templateParams: !1011)
+!1033 = !{!1034, !1035, !1037}
+!1034 = !DILocalVariable(name: "self", arg: 1, scope: !1002, file: !1001, line: 1096, type: !1003)
+!1035 = !DILocalVariable(name: "t", scope: !1036, file: !1001, line: 1101, type: !614, align: 8)
+!1036 = distinct !DILexicalBlock(scope: !1002, file: !1001, line: 1101, column: 13)
+!1037 = !DILocalVariable(name: "e", scope: !1000, file: !1001, line: 1102, type: !315, align: 8)
+!1038 = distinct !DILocation(line: 84, column: 20, scope: !2)
+!1039 = distinct !DISubprogram(name: "_Unwind_Resume", scope: !7, file: !1040, line: 346, type: !1041, scopeLine: 346, flags: DIFlagPrototyped | DIFlagNoReturn, spFlags: DISPFlagDefinition, unit: !564, templateParams: !46, retainedNodes: !1112)
+!1040 = !DIFile(filename: "src/unwinder/mod.rs", directory: "/home/dev/ecosystem/unwinding", checksumkind: CSK_MD5, checksum: "0b7cd150e86dd087aeaa8e0e18bae6d9")
+!1041 = !DISubroutineType(types: !1042)
+!1042 = !{null, !1043}
+!1043 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "*mut unwinding::unwinder::UnwindException", baseType: !1044, size: 64, align: 64, dwarfAddressSpace: 0)
+!1044 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindException", scope: !7, file: !5, size: 256, align: 64, flags: DIFlagPublic, elements: !1045, templateParams: !46, identifier: "f6e359707e96b28f68e0123bb3490311")
+!1045 = !{!1046, !1047, !1068, !1109, !1110}
+!1046 = !DIDerivedType(tag: DW_TAG_member, name: "exception_class", scope: !1044, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagPublic)
+!1047 = !DIDerivedType(tag: DW_TAG_member, name: "exception_cleanup", scope: !1044, file: !5, baseType: !1048, size: 64, align: 64, offset: 64, flags: DIFlagPublic)
+!1048 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<unsafe extern \22C\22 fn(unwinding::abi::UnwindReasonCode, *mut unwinding::unwinder::UnwindException)>", scope: !39, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1049, templateParams: !46, identifier: "55edbca04b7b79406fe597df5da69fb6")
+!1049 = !{!1050}
+!1050 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !1048, file: !5, size: 64, align: 64, elements: !1051, templateParams: !46, identifier: "d4ba33946a9e213e48833b2948ffc69a", discriminator: !1067)
+!1051 = !{!1052, !1063}
+!1052 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !1050, file: !5, baseType: !1053, size: 64, align: 64, extraData: i128 0)
+!1053 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !1048, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !1054, identifier: "5f49070303e2d908386f0a327220e7")
+!1054 = !{!1055}
+!1055 = !DITemplateTypeParameter(name: "T", type: !1056)
+!1056 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "unsafe extern \22C\22 fn(unwinding::abi::UnwindReasonCode, *mut unwinding::unwinder::UnwindException)", baseType: !1057, size: 64, align: 64, dwarfAddressSpace: 0)
+!1057 = !DISubroutineType(types: !1058)
+!1058 = !{null, !1059, !1043}
+!1059 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindReasonCode", scope: !1060, file: !5, size: 32, align: 32, flags: DIFlagPublic, elements: !1061, templateParams: !46, identifier: "78d1c20b6f4c6f13f91e6941a59e3070")
+!1060 = !DINamespace(name: "abi", scope: !8)
+!1061 = !{!1062}
+!1062 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1059, file: !5, baseType: !747, size: 32, align: 32, flags: DIFlagPublic)
+!1063 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !1050, file: !5, baseType: !1064, size: 64, align: 64)
+!1064 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !1048, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1065, templateParams: !1054, identifier: "88c5936a7984265e3c9f2ddf1a30acca")
+!1065 = !{!1066}
+!1066 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1064, file: !5, baseType: !1056, size: 64, align: 64, flags: DIFlagPublic)
+!1067 = !DIDerivedType(tag: DW_TAG_member, scope: !1048, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!1068 = !DIDerivedType(tag: DW_TAG_member, name: "private_1", scope: !1044, file: !5, baseType: !1069, size: 64, align: 64, offset: 128, flags: DIFlagPrivate)
+!1069 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<unsafe extern \22C\22 fn(i32, unwinding::abi::UnwindAction, u64, *mut unwinding::unwinder::UnwindException, &mut unwinding::unwinder::UnwindContext, *mut core::ffi::c_void) -> unwinding::abi::UnwindReasonCode>", scope: !39, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1070, templateParams: !46, identifier: "3fd0f4ff1cf8b26bfa970433d6b9be1f")
+!1070 = !{!1071}
+!1071 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !1069, file: !5, size: 64, align: 64, elements: !1072, templateParams: !46, identifier: "c06dd7a3f8e0e4b1f3c073ade268504e", discriminator: !1108)
+!1072 = !{!1073, !1104}
+!1073 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !1071, file: !5, baseType: !1074, size: 64, align: 64, extraData: i128 0)
+!1074 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !1069, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !1075, identifier: "a7907e0a0f03f43538101bc2ae5b0cc9")
+!1075 = !{!1076}
+!1076 = !DITemplateTypeParameter(name: "T", type: !1077)
+!1077 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "unsafe extern \22C\22 fn(i32, unwinding::abi::UnwindAction, u64, *mut unwinding::unwinder::UnwindException, &mut unwinding::unwinder::UnwindContext, *mut core::ffi::c_void) -> unwinding::abi::UnwindReasonCode", baseType: !1078, size: 64, align: 64, dwarfAddressSpace: 0)
+!1078 = !DISubroutineType(types: !1079)
+!1079 = !{!1059, !747, !1080, !90, !1043, !1083, !1103}
+!1080 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindAction", scope: !1060, file: !5, size: 32, align: 32, flags: DIFlagPublic, elements: !1081, templateParams: !46, identifier: "364c99c0f0ff127f318feffefcb3c87")
+!1081 = !{!1082}
+!1082 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1080, file: !5, baseType: !747, size: 32, align: 32, flags: DIFlagPublic)
+!1083 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&mut unwinding::unwinder::UnwindContext", baseType: !1084, size: 64, align: 64, dwarfAddressSpace: 0)
+!1084 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "UnwindContext", scope: !7, file: !5, size: 192, align: 64, flags: DIFlagPublic, elements: !1085, templateParams: !46, identifier: "911f8c19bc1f5e24ad054a625f8be0d6")
+!1085 = !{!1086, !1100, !1102}
+!1086 = !DIDerivedType(tag: DW_TAG_member, name: "frame", scope: !1084, file: !5, baseType: !1087, size: 64, align: 64, offset: 64, flags: DIFlagPrivate)
+!1087 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Option<&unwinding::unwinder::frame::Frame>", scope: !39, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1088, templateParams: !46, identifier: "74fadfe0892d41cd8e0d03eb53ad3e54")
+!1088 = !{!1089}
+!1089 = distinct !DICompositeType(tag: DW_TAG_variant_part, scope: !1087, file: !5, size: 64, align: 64, elements: !1090, templateParams: !46, identifier: "3f92d1546b9840fa83783ed5018281cd", discriminator: !1099)
+!1090 = !{!1091, !1095}
+!1091 = !DIDerivedType(tag: DW_TAG_member, name: "None", scope: !1089, file: !5, baseType: !1092, size: 64, align: 64, extraData: i128 0)
+!1092 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "None", scope: !1087, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !46, templateParams: !1093, identifier: "512ffa16cad01e9d1b32a5885a0360bc")
+!1093 = !{!1094}
+!1094 = !DITemplateTypeParameter(name: "T", type: !549)
+!1095 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !1089, file: !5, baseType: !1096, size: 64, align: 64)
+!1096 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !1087, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1097, templateParams: !1093, identifier: "e463cc92afc82dc88438dd3a5d8e906d")
+!1097 = !{!1098}
+!1098 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1096, file: !5, baseType: !549, size: 64, align: 64, flags: DIFlagPublic)
+!1099 = !DIDerivedType(tag: DW_TAG_member, scope: !1087, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!1100 = !DIDerivedType(tag: DW_TAG_member, name: "ctx", scope: !1084, file: !5, baseType: !1101, size: 64, align: 64, flags: DIFlagPrivate)
+!1101 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&mut unwinding::unwinder::arch::aarch64::Context", baseType: !551, size: 64, align: 64, dwarfAddressSpace: 0)
+!1102 = !DIDerivedType(tag: DW_TAG_member, name: "signal", scope: !1084, file: !5, baseType: !103, size: 8, align: 8, offset: 128, flags: DIFlagPrivate)
+!1103 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "*mut core::ffi::c_void", baseType: !586, size: 64, align: 64, dwarfAddressSpace: 0)
+!1104 = !DIDerivedType(tag: DW_TAG_member, name: "Some", scope: !1071, file: !5, baseType: !1105, size: 64, align: 64)
+!1105 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Some", scope: !1069, file: !5, size: 64, align: 64, flags: DIFlagPublic, elements: !1106, templateParams: !1075, identifier: "757604dfadcc7bc333dd8afe5c3f1b07")
+!1106 = !{!1107}
+!1107 = !DIDerivedType(tag: DW_TAG_member, name: "__0", scope: !1105, file: !5, baseType: !1077, size: 64, align: 64, flags: DIFlagPublic)
+!1108 = !DIDerivedType(tag: DW_TAG_member, scope: !1069, file: !5, baseType: !90, size: 64, align: 64, flags: DIFlagArtificial)
+!1109 = !DIDerivedType(tag: DW_TAG_member, name: "private_2", scope: !1044, file: !5, baseType: !21, size: 64, align: 64, offset: 192, flags: DIFlagPrivate)
+!1110 = !DIDerivedType(tag: DW_TAG_member, name: "private_unused", scope: !1044, file: !5, baseType: !1111, align: 64, offset: 256, flags: DIFlagPrivate)
+!1111 = !DICompositeType(tag: DW_TAG_array_type, baseType: !21, align: 64, elements: !798)
+!1112 = !{!1113}
+!1113 = !DILocalVariable(name: "exception", arg: 1, scope: !1039, file: !1040, line: 346, type: !1043)

From 53c43bab2077644ecf152bebffd921572e418692 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Sun, 25 Aug 2024 02:10:45 -0400
Subject: [PATCH 219/427] [clangd] Add clangd 19 release notes

---
 clang-tools-extra/docs/ReleaseNotes.rst | 35 +++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 7146196862986..ebcdeca8c2ee5 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -61,6 +61,8 @@ Diagnostics
 Semantic Highlighting
 ^^^^^^^^^^^^^^^^^^^^^
 
+- Improved semantic token coverage in some edge cases, e.g. IndirectFieldDecl
+
 Compile flags
 ^^^^^^^^^^^^^
 
@@ -70,24 +72,57 @@ Hover
 Code completion
 ^^^^^^^^^^^^^^^
 
+- ``--function-arg-placeholders=0`` is now respected for variable template argument lists
+   as well
+- Macro proposals now use the completion item kind ``Constant`` (for object-like macros)
+  or ``Function`` (for function-style macros) even for proposals coming from the index
+
 Code actions
 ^^^^^^^^^^^^
 
+- The "extract variable" tweak is no longer offered for the initializer expression of a
+  declaration
 - The tweak for turning unscoped into scoped enums now removes redundant prefixes
   from the enum values.
+- Support "move function body out-of-line" in non-header files as well
 
 Signature help
 ^^^^^^^^^^^^^^
 
+- Signature help now shows function argument names for calls through pointers to
+  functions in struct fields
+
 Cross-references
 ^^^^^^^^^^^^^^^^
 
+- Improve go-to-definition for some concept references
+
+Document outline
+^^^^^^^^^^^^^^^^
+
+- Improved precision of document outline information for symbols whose definitions
+  involve macro expansions
+
+Clang-tidy integration
+^^^^^^^^^^^^^^^^^^^^^^
+
+- The quick fix for clang-tidy's ``readability-identifier-naming`` diagnostic is now
+  hooked to invoke ``textDocument/rename``, renaming the identifier across the whole
+  project rather than just the translation unit of the diagnostic
+- ``misc-const-correctness`` can now be enabled with ``FastCheckFilter: None``
+  (previously clangd would force it off unconditionally due to its run time)
+
 Objective-C
 ^^^^^^^^^^^
 
+- Added support for renaming Objective-C methods
+
 Miscellaneous
 ^^^^^^^^^^^^^
 
+- Worked around a clang-format bug that caused memory exhaustion when opening some large
+  ``.h`` files due to the formatter's language guessing heuristic (#GH85703)
+- Various other stability improvements, e.g. crash fixes
 - Added a boolean option `AnalyzeAngledIncludes` to `Includes` config section,
   which allows to enable unused includes detection for all angled ("system") headers.
   At this moment umbrella headers are not supported, so enabling this option

From 1b643dbad74986718460f28347cbd17085402383 Mon Sep 17 00:00:00 2001
From: Hans <hans@hanshq.net>
Date: Thu, 29 Aug 2024 13:54:30 +0200
Subject: [PATCH 220/427] Restrict LLVM_TARGETS_TO_BUILD in Windows release
 packaging (#106059)

When including all targets, some files become too large for the NSIS
installer to handle.

Fixes #101994

(cherry picked from commit 2a28df66dc3f7ff5b6233241837854acefb68d77)
---
 llvm/utils/release/build_llvm_release.bat | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/release/build_llvm_release.bat b/llvm/utils/release/build_llvm_release.bat
index 55b20c7c28ac1..64ae2d41ab2b0 100755
--- a/llvm/utils/release/build_llvm_release.bat
+++ b/llvm/utils/release/build_llvm_release.bat
@@ -151,6 +151,7 @@ set common_cmake_flags=^
   -DCMAKE_BUILD_TYPE=Release ^
   -DLLVM_ENABLE_ASSERTIONS=OFF ^
   -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^
+  -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86" ^
   -DLLVM_BUILD_LLVM_C_DYLIB=ON ^
   -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^
   -DPython3_FIND_REGISTRY=NEVER ^

From eba1ef5a1b7a84ed1954797dcd6d6f073b1f1a56 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Thu, 29 Aug 2024 09:50:44 -0700
Subject: [PATCH 221/427] [AArch64] Make apple-m4 armv8.7-a again (from
 armv9.2-a).  (#106312)

This is a partial revert of c66e1d6f3429.  Even though that
allowed us to declare v9.2-a support without picking up SVE2
in both the backend and the driver, the frontend itself still
enabled SVE via the arch version's default extensions.

Avoid that by reverting back to v8.7-a while we look into
longer-term solutions.

(cherry picked from commit e5e38ddf1b8043324175868831da21e941c00aff)
---
 clang/test/CodeGen/aarch64-targetattr.c          | 9 +++++++++
 llvm/lib/Target/AArch64/AArch64Processors.td     | 7 ++++++-
 llvm/unittests/TargetParser/TargetParserTest.cpp | 2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c
index d6227be2ebef8..a5f94b46cbb17 100644
--- a/clang/test/CodeGen/aarch64-targetattr.c
+++ b/clang/test/CodeGen/aarch64-targetattr.c
@@ -191,6 +191,14 @@ __attribute__((target("no-v9.3a")))
 //
 void minusarch() {}
 
+__attribute__((target("cpu=apple-m4")))
+// CHECK-LABEL: define {{[^@]+}}@applem4
+// CHECK-SAME: () #[[ATTR18:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret void
+//
+void applem4() {}
+
 //.
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
@@ -210,6 +218,7 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
+// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 6df87fc6a815f..410b53e14de22 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -895,7 +895,12 @@ def ProcessorFeatures {
                                      FeatureLSE, FeaturePAuth,
                                      FeatureRAS, FeatureRCPC, FeatureRDM,
                                      FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
-  list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8,
+  // Technically apple-m4 is v9.2a, but we can't use that here.
+  // Historically, llvm defined v9.0a as requiring SVE, but it's optional
+  // according to the Arm ARM, and not supported by the core.  We decoupled the
+  // two in the clang driver and in the backend subtarget features, but it's
+  // still an issue in the clang frontend.  v8.7a is the next closest choice.
+  list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureSHA2, FeatureFPARMv8,
                                     FeatureNEON, FeaturePerfMon, FeatureSHA3,
                                     FeatureFullFP16, FeatureFP16FML,
                                     FeatureAES, FeatureBF16,
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index dcbbc68332c79..11a2860367413 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1638,7 +1638,7 @@ INSTANTIATE_TEST_SUITE_P(
              AArch64::AEK_I8MM,    AArch64::AEK_JSCVT,   AArch64::AEK_FCMA,
              AArch64::AEK_PAUTH,   AArch64::AEK_PERFMON, AArch64::AEK_HCX,
              AArch64::AEK_SSBS}),
-        AArch64CPUTestParams("apple-m4", "armv9.2-a",
+        AArch64CPUTestParams("apple-m4", "armv8.7-a",
                              {AArch64::AEK_CRC,       AArch64::AEK_AES,
                               AArch64::AEK_SHA2,      AArch64::AEK_SHA3,
                               AArch64::AEK_FP,        AArch64::AEK_SIMD,

From c21b039178b2efd17bc4eef906ab7b3a07cab288 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 30 Aug 2024 19:46:33 -0700
Subject: [PATCH 222/427] workflows/release-binaries: Remove .git/config file
 from artifacts (#106310)

The .git/config file contains an auth token that can be leaked if the
.git directory is included in a workflow artifact.

(cherry picked from commit ef50970204384643acca42ba4c7ca8f14865a0c2)
---
 .github/workflows/release-binaries-save-stage/action.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/release-binaries-save-stage/action.yml b/.github/workflows/release-binaries-save-stage/action.yml
index e2f3eeadd15be..f08088c7bc56f 100644
--- a/.github/workflows/release-binaries-save-stage/action.yml
+++ b/.github/workflows/release-binaries-save-stage/action.yml
@@ -10,6 +10,9 @@ inputs:
     required: true
     type: 'string'
 
+permissions:
+  contents: read
+
 runs:
   using: "composite"
   steps:
@@ -18,6 +21,9 @@ runs:
     - name: Package Build and Source Directories
       shell: bash
       run: |
+        # Remove .git/config to avoid leaking GITHUB_TOKEN stored there.
+        # See https://unit42.paloaltonetworks.com/github-repo-artifacts-leak-tokens/
+        rm -Rf .git/config
         # Windows does not support symlinks, so we need to dereference them.
         tar --exclude build/ ${{ (runner.os == 'Windows' && '-h') || '' }} -c . | zstd -T0 -c > ../llvm-project.tar.zst
         mv ../llvm-project.tar.zst .

From 816fde1cbb700ebcc8b3df81fb93d675c04c12cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Thu, 29 Aug 2024 20:57:25 +0200
Subject: [PATCH 223/427] [clang] Install scan-build-py into plain "lib"
 directory (#106612)

Install scan-build-py modules into the plain `lib` directory,
without LLVM_LIBDIR_SUFFIX appended, to match the path expected
by `intercept-build` executable.  This fixes the program being unable
to find its modules.  Using unsuffixed path makes sense here, since
Python modules are not subject to multilib.

This change effectively reverts 1334e129a39cb427e7b855e9a711a3e7604e50e5.
The commit in question changed the path without a clear justification
("does not respect the given prefix") and the Python code was never
modified to actually work with the change.

Fixes #106608

(cherry picked from commit 0c4cf79defe30d43279bf4526cdf32b6c7f8a197)
---
 clang/tools/scan-build-py/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt
index 3aca22c0b0a8d..9273eb5ed977e 100644
--- a/clang/tools/scan-build-py/CMakeLists.txt
+++ b/clang/tools/scan-build-py/CMakeLists.txt
@@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild})
                      DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib})
   list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib})
   install(FILES lib/libscanbuild/${lib}
-          DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild
+          DESTINATION lib/libscanbuild
           COMPONENT scan-build-py)
 endforeach()
 
@@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources})
                      DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource})
   list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource})
   install(FILES lib/libscanbuild/resources/${resource}
-          DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources
+          DESTINATION lib/libscanbuild/resources
           COMPONENT scan-build-py)
 endforeach()
 
@@ -122,7 +122,7 @@ foreach(lib ${LibEar})
                      DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib})
   list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib})
   install(FILES lib/libear/${lib}
-          DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear
+          DESTINATION lib/libear
           COMPONENT scan-build-py)
 endforeach()
 

From e81188d58202ee7b887e48bc3e4b102fc5f45619 Mon Sep 17 00:00:00 2001
From: Kai Yan <aklkaiyan@tencent.com>
Date: Wed, 24 Jul 2024 12:06:35 +0800
Subject: [PATCH 224/427] [llvm][CodeGen] Added missing initialization failure
 information for window scheduler (#99449)

Added missing initialization failure information for window scheduler.
---
 llvm/lib/CodeGen/WindowScheduler.cpp        | 5 ++++-
 llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 0777480499e55..3fe8a1aaafd12 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -232,8 +232,11 @@ bool WindowScheduler::initialize() {
       return false;
     }
     for (auto &Def : MI.all_defs())
-      if (Def.isReg() && Def.getReg().isPhysical())
+      if (Def.isReg() && Def.getReg().isPhysical()) {
+        LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
+                             "window scheduling!\n");
         return false;
+      }
   }
   if (SchedInstrNum <= WindowRegionLimit) {
     LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
index 601b98dca8e20..be75301b016ed 100644
--- a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
@@ -3,6 +3,7 @@
 # RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
 # RUN: | FileCheck %s
 
+# CHECK: Physical registers are not supported in window scheduling!
 # CHECK: The WindowScheduler failed to initialize!
 
 ---

From 7b86034dcb8c7fd7ea125cec43f0117cd4a428b6 Mon Sep 17 00:00:00 2001
From: Kai Yan <aklkaiyan@tencent.com>
Date: Wed, 24 Jul 2024 12:11:58 +0800
Subject: [PATCH 225/427] [llvm][CodeGen] Added a new restriction for II by
 pragma in window scheduler (#99448)

Added a new restriction for window scheduling.
Window scheduling is disabled when llvm.loop.pipeline.initiationinterval
is set.
---
 llvm/lib/CodeGen/MachinePipeliner.cpp         | 12 ++-
 ...swp-ws-pragma-initiation-interval-fail.mir | 83 +++++++++++++++++++
 2 files changed, 93 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 497e282bb9768..5c68711ff6193 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -528,8 +528,16 @@ bool MachinePipeliner::useSwingModuloScheduler() {
 }
 
 bool MachinePipeliner::useWindowScheduler(bool Changed) {
-  // WindowScheduler does not work when it is off or when SwingModuloScheduler
-  // is successfully scheduled.
+  // WindowScheduler does not work for following cases:
+  // 1. when it is off.
+  // 2. when SwingModuloScheduler is successfully scheduled.
+  // 3. when pragma II is enabled.
+  if (II_setByPragma) {
+    LLVM_DEBUG(dbgs() << "Window scheduling is disabled when "
+                         "llvm.loop.pipeline.initiationinterval is set.\n");
+    return false;
+  }
+
   return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
          (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
 }
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
new file mode 100644
index 0000000000000..6e69a76290fb1
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
@@ -0,0 +1,83 @@
+# RUN: llc  --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+# REQUIRES: asserts
+
+# Test that checks no window scheduler is performed if the II set by pragma was
+# enabled
+
+# CHECK: Window scheduling is disabled when llvm.loop.pipeline.initiationinterval is set.
+
+--- |
+  define void @test_pragma_ii_fail(ptr %a0, i32 %a1) {
+  b0:
+    %v0 = icmp sgt i32 %a1, 1
+    br i1 %v0, label %b1, label %b4
+
+  b1:                                               ; preds = %b0
+    %v1 = load i32, ptr %a0, align 4
+    %v2 = add i32 %v1, 10
+    %v4 = add i32 %a1, -1
+    %cgep = getelementptr i32, ptr %a0, i32 1
+    br label %b2
+
+  b2:                                               ; preds = %b2, %b1
+    %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
+    %v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ]
+    %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
+    store i32 %v7, ptr %v6, align 4
+    %v8 = add i32 %v7, 10
+    %cgep1 = getelementptr i32, ptr %v6, i32 -1
+    store i32 %v8, ptr %cgep1, align 4
+    %v10 = add i32 %v7, 10
+    %v12 = add i32 %v5, -1
+    %v13 = icmp eq i32 %v12, 0
+    %cgep2 = getelementptr i32, ptr %v6, i32 1
+    br i1 %v13, label %b4, label %b2, !llvm.loop !0
+
+  b4:                                               ; preds = %b2, %b0
+    ret void
+  }
+
+  !0 = distinct !{!0, !1}
+  !1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2}
+...
+---
+name:            test_pragma_ii_fail
+tracksRegLiveness: true
+body:             |
+  bb.0.b0:
+    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    liveins: $r0, $r1
+  
+    %0:intregs = COPY $r1
+    %1:intregs = COPY $r0
+    %2:predregs = C2_cmpgti %0, 1
+    J2_jumpf %2, %bb.3, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+  
+  bb.1.b1:
+    successors: %bb.2(0x80000000)
+  
+    %3:intregs, %4:intregs = L2_loadri_pi %1, 4
+    %5:intregs = A2_addi killed %3, 10
+    %6:intregs = A2_addi %0, -1
+    %7:intregs = COPY %6
+    J2_loop0r %bb.2, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.2.b2 (machine-block-address-taken):
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  
+    %8:intregs = PHI %4, %bb.1, %9, %bb.2
+    %10:intregs = PHI %5, %bb.1, %11, %bb.2
+    S2_storeri_io %8, 0, %10
+    %11:intregs = A2_addi %10, 10
+    S2_storeri_io %8, -4, %11
+    %9:intregs = A2_addi %8, 4
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.3, implicit-def dead $pc
+  
+  bb.3.b4:
+    PS_jmpret $r31, implicit-def dead $pc
+
+...

From 06d009789f771e8cef82714549c3136e320312be Mon Sep 17 00:00:00 2001
From: Kai Yan <aklkaiyan@tencent.com>
Date: Thu, 25 Jul 2024 19:16:23 +0800
Subject: [PATCH 226/427] [llvm][CodeGen] Fixed a bug in stall cycle
 calculation for window scheduler (#99451)

Fixed a bug in stall cycle calculation.
When a register defined by an instruction in the current iteration is
used by an instruction in the next iteration, we have modified the
number of stall cycle that need to be inserted.
---
 llvm/lib/CodeGen/WindowScheduler.cpp          |  7 ++-
 .../CodeGen/Hexagon/swp-ws-stall-cycle.mir    | 59 +++++++++++++++++++
 2 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 3fe8a1aaafd12..02275afff4e6d 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -488,6 +488,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
 // ========================================
 int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
   int MaxStallCycle = 0;
+  int CurrentII = MaxCycle + 1;
   auto Range = getScheduleRange(Offset, SchedInstrNum);
   for (auto &MI : Range) {
     auto *SU = TripleDAG->getSUnit(&MI);
@@ -495,8 +496,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
     for (auto &Succ : SU->Succs) {
       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
         continue;
-      // If the expected cycle does not exceed MaxCycle, no check is needed.
-      if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+      // If the expected cycle does not exceed CurrentII, no check is needed.
+      if (DefCycle + (int)Succ.getLatency() <= CurrentII)
         continue;
       // If the cycle of the scheduled MI A is less than that of the scheduled
       // MI B, the scheduling will fail because the lifetime of the
@@ -506,7 +507,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
       if (DefCycle < UseCycle)
         return WindowIILimit;
       // Get the stall cycle introduced by the register between two trips.
-      int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+      int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
     }
   }
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
new file mode 100644
index 0000000000000..ddba67d78eb58
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir
@@ -0,0 +1,59 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs \
+# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \
+# RUN: 2>&1 | FileCheck %s
+
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+# CHECK-LABEL: Start analyzing II
+# CHECK: MaxStallCycle is 0
+
+---
+name:            test_window_stall_cycle
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.3(0x40000000), %bb.1(0x40000000)
+    liveins: $r0, $r1
+  
+    %0:intregs = COPY $r1
+    %1:intregs = COPY $r0
+    %2:intregs = nsw A2_add %0, %1
+    %3:intregs = S2_lsr_i_r_acc %2, %2, 31
+    %4:intregs = S2_asr_i_r killed %3, 1
+    %5:predregs = C2_cmpgt %1, %4
+    %6:intregs = A2_tfrsi 0
+    J2_jumpt killed %5, %bb.3, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+  
+  bb.1:
+    successors: %bb.2(0x80000000)
+  
+    %7:intregs = A2_addi %4, 2
+    %8:intregs = A2_tfrsi 0
+    %9:intregs = A2_sub %4, %1
+    %10:intregs = A2_addi %9, 1
+    %11:intregs = COPY %10
+    J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.2 (machine-block-address-taken):
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  
+    %12:intregs = PHI %7, %bb.1, %13, %bb.2
+    %14:intregs = PHI %8, %bb.1, %15, %bb.2
+    %16:intregs = PHI %8, %bb.1, %17, %bb.2
+    %18:intregs, %13:intregs = L2_loadri_pi %12, -4
+    %17:intregs = nsw A2_add killed %18, %16
+    %15:intregs = A2_max %17, %14
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.3, implicit-def dead $pc
+  
+  bb.3:
+    %19:intregs = PHI %6, %bb.0, %15, %bb.2
+    $r0 = COPY %19
+    PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...

From 5a164a28e37fe3cda99236595167f7762b47c76d Mon Sep 17 00:00:00 2001
From: Kai Yan <aklkaiyan@tencent.com>
Date: Wed, 24 Jul 2024 12:06:10 +0800
Subject: [PATCH 227/427] [llvm][CodeGen] Fixed max cycle calculation with
 zero-cost instructions for window scheduler (#99454)

We discovered some scheduling failures occurring when zero-cost
instructions were involved. This issue will be addressed by this patch.
---
 llvm/lib/CodeGen/WindowScheduler.cpp          | 16 ++++---
 .../test/CodeGen/Hexagon/swp-ws-zero-cost.mir | 45 +++++++++++++++++++
 2 files changed, 55 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 02275afff4e6d..6a2c6537470db 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -440,12 +440,16 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
       int PredCycle = getOriCycle(PredMI);
       ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
     }
-    // ResourceManager can be used to detect resource conflicts between the
-    // current MI and the previously inserted MIs.
-    while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
-      ++CurCycle;
-      if (CurCycle == (int)WindowIILimit)
-        return CurCycle;
+    // Zero cost instructions do not need to check resource.
+    if (!TII->isZeroCost(MI.getOpcode())) {
+      // ResourceManager can be used to detect resource conflicts between the
+      // current MI and the previously inserted MIs.
+      while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
+        ++CurCycle;
+        if (CurCycle == (int)WindowIILimit)
+          return CurCycle;
+      }
+      RM.reserveResources(*SU, CurCycle);
     }
     RM.reserveResources(*SU, CurCycle);
     OriToCycle[getOriMI(&MI)] = CurCycle;
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
new file mode 100644
index 0000000000000..ecf49a83c69e1
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
@@ -0,0 +1,45 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+
+# CHECK-NOT: Can't find a valid II. Keep searching...
+# CHECK: Start analyzing II
+# CHECK: Start scheduling Phis
+# CHECK: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}}
+
+---
+name:            relu
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $r0, $r1, $r2
+    %0:intregs = COPY $r2
+    %1:intregs = COPY $r1
+    %2:intregs = COPY $r0
+    %3:predregs = C2_cmpeqi %2, 0
+    J2_jumpt killed %3, %bb.2, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+  bb.1:
+    successors: %bb.3(0x80000000)
+    %4:hvxvr = V6_vd0
+    %5:intregs = A2_addi %2, 31
+    %6:intregs = S2_lsr_i_r %5, 5
+    %7:intregs = COPY %6
+    J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+    J2_jump %bb.3, implicit-def dead $pc
+  bb.2:
+    PS_jmpret $r31, implicit-def dead $pc
+  bb.3 (machine-block-address-taken):
+    successors: %bb.3(0x7c000000), %bb.2(0x04000000)
+    %8:intregs = PHI %1, %bb.1, %9, %bb.3
+    %10:intregs = PHI %0, %bb.1, %14, %bb.3
+    %11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128
+    %12:intregs = COPY %10
+    %13:hvxvr = V6_vmaxw killed %11, %4
+    %14:intregs = V6_vS32b_pi %12, 128, killed %13
+    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+...
+

From 54579830d81a67cfa52b90c34bcf9a631f53fcc5 Mon Sep 17 00:00:00 2001
From: Kai Yan <aklkaiyan@tencent.com>
Date: Mon, 5 Aug 2024 17:44:05 +0800
Subject: [PATCH 228/427] [llvm][CodeGen] Address the issue of multiple
 resource reservations In window scheduling (#101665)

Address the issue of multiple resource reservations in window
scheduling.
---
 llvm/lib/CodeGen/WindowScheduler.cpp          |   1 -
 .../Hexagon/swp-ws-resource-reserve.mir       | 100 ++++++++++++++++++
 2 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir

diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 6a2c6537470db..f1658e36ae1e9 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -451,7 +451,6 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
       }
       RM.reserveResources(*SU, CurCycle);
     }
-    RM.reserveResources(*SU, CurCycle);
     OriToCycle[getOriMI(&MI)] = CurCycle;
     LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
                       << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir
new file mode 100644
index 0000000000000..4a9a09c4148cb
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-resource-reserve.mir
@@ -0,0 +1,100 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: -window-search-ratio=100 -window-search-num=100 -window-diff-limit=1 \
+# RUN: | FileCheck %s
+
+# We want to verify that all three V6_vaddw instructions are emitted in the same cycle.
+# CHECK-LABEL: Current window Offset is 2
+# CHECK: Cycle [[CycleNum:[0-9]+]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+# CHECK: Cycle [[CycleNum]] [{{S.[0-9]+}}]: {{%[0-9]+}}:hvxvr = V6_vaddw {{%[0-9]+}}:hvxvr, {{%[0-9]+}}:hvxvr
+# CHECK-LABEL: Current window Offset is 3
+
+--- |
+  define void @add_parallel(i32 %N, ptr noalias %x, ptr noalias %y) {
+  entry:
+    %isZeroLength = icmp eq i32 %N, 0
+    br i1 %isZeroLength, label %loop.exit, label %loop.preheader
+
+  loop.preheader:                                   ; preds = %entry
+    %half_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608)
+    %one_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1065353216)
+    %two_splat = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1073741824)
+    br label %loop.body
+
+  loop.exit:                                        ; preds = %loop.body, %entry
+    ret void
+
+  loop.body:                                        ; preds = %loop.body, %loop.preheader
+    %lsr.iv1 = phi ptr [ %cgep2, %loop.body ], [ %x, %loop.preheader ]
+    %lsr.iv = phi ptr [ %cgep1, %loop.body ], [ %y, %loop.preheader ]
+    %index = phi i32 [ 0, %loop.preheader ], [ %index.next, %loop.body ]
+    %vec_x1 = load <32 x i32>, ptr %lsr.iv1, align 128
+    %vec_add_1 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %one_splat, <32 x i32> %vec_x1)
+    %vec_add_2 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %half_splat, <32 x i32> %vec_x1)
+    %vec_add_3 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %two_splat, <32 x i32> %vec_x1)
+    %vec_add_4 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_2)
+    %vec_add_5 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_1, <32 x i32> %vec_add_3)
+    %vec_add_6 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32> %vec_add_5, <32 x i32> %vec_add_4)
+    store <32 x i32> %vec_add_6, ptr %lsr.iv, align 128
+    %index.next = add nuw i32 %index, 32
+    %continue = icmp ult i32 %index.next, %N
+    %cgep1 = getelementptr i8, ptr %lsr.iv, i32 128
+    %cgep2 = getelementptr i8, ptr %lsr.iv1, i32 128
+    br i1 %continue, label %loop.body, label %loop.exit
+  }
+
+  declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32)
+  declare <32 x i32> @llvm.hexagon.V6.vaddw.128B(<32 x i32>, <32 x i32>)
+...
+---
+name:            add_parallel
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $r0, $r1, $r2
+
+    %0:intregs = COPY $r2
+    %1:intregs = COPY $r1
+    %2:intregs = COPY $r0
+    %3:predregs = C2_cmpeqi %2, 0
+    J2_jumpt killed %3, %bb.2, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+
+  bb.1.loop.preheader:
+    successors: %bb.3(0x80000000)
+
+    %4:intregs = A2_tfrsi 1056964608
+    %5:hvxvr = V6_lvsplatw killed %4
+    %6:intregs = A2_tfrsi 1065353216
+    %7:hvxvr = V6_lvsplatw killed %6
+    %8:intregs = A2_tfrsi 1073741824
+    %9:hvxvr = V6_lvsplatw killed %8
+    %10:intregs = A2_addi %2, 31
+    %11:intregs = S2_lsr_i_r %10, 5
+    %12:intregs = COPY %11
+    J2_loop0r %bb.3, %12, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+    J2_jump %bb.3, implicit-def dead $pc
+
+  bb.2.loop.exit:
+    PS_jmpret $r31, implicit-def dead $pc
+
+  bb.3.loop.body (machine-block-address-taken):
+    successors: %bb.3(0x7c000000), %bb.2(0x04000000)
+
+    %13:intregs = PHI %1, %bb.1, %14, %bb.3
+    %15:intregs = PHI %0, %bb.1, %16, %bb.3
+    %17:hvxvr, %14:intregs = V6_vL32b_pi %13, 128 :: (load (s1024) from %ir.lsr.iv1)
+    %18:hvxvr = V6_vaddw %7, %17
+    %19:hvxvr = V6_vaddw %5, %17
+    %20:hvxvr = V6_vaddw %9, %17
+    %21:hvxvr = V6_vaddw %18, killed %19
+    %22:hvxvr = V6_vaddw %18, killed %20
+    %23:hvxvr = V6_vaddw killed %22, killed %21
+    %16:intregs = V6_vS32b_pi %15, 128, killed %23 :: (store (s1024) into %ir.lsr.iv)
+    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+
+...

From 78f97e22e5d87f9efc6b2c0ec76f60667458ca8a Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs@gmail.com>
Date: Wed, 21 Aug 2024 14:24:56 +0200
Subject: [PATCH 229/427] [analyzer] Limit `isTainted()` by skipping
 complicated symbols (#105493)

As discussed in

https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570/10

Some `isTainted()` queries can blow up the analysis times, and
effectively halt the analysis under specific workloads.

We don't really have the time now to do a caching re-implementation of
`isTainted()`, so we need to workaround the case.

The workaround with the smallest blast radius was to limit what symbols
`isTainted()` does the query (by walking the SymExpr). So far, the
threshold 10 worked for us, but this value can be overridden using the
"max-tainted-symbol-complexity" config value.

This new option is "deprecated" from the getgo, as I expect this issue
to be fixed within the next few months and I don't want users to
override this value anyways. If they do, this message will let them know
that they are on their own, and the next release may break them (as we
no longer recognize this option if we drop it).

Mitigates #89720

CPP-5414

(cherry picked from commit 848658955a9d2d42ea3e319d191e2dcd5d76c837)
---
 clang/docs/ReleaseNotes.rst                   |  5 ++
 .../StaticAnalyzer/Core/AnalyzerOptions.def   |  5 ++
 clang/lib/StaticAnalyzer/Checkers/Taint.cpp   |  7 +++
 clang/test/Analysis/analyzer-config.c         |  1 +
 clang/test/Analysis/taint-generic.c           | 49 ++++++++++++++++++-
 5 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b68b823ae6761..ef5a2fda1ea77 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1407,6 +1407,11 @@ Crash and bug fixes
 - Z3 crosschecking (aka. Z3 refutation) is now bounded, and can't consume
   more total time than the eymbolic execution itself. (#GH97298)
 
+- In clang-18, we regressed in terms of analysis time for projects having many
+  nested loops with buffer indexing or shifting or other binary operations.
+  For example, functions computing different hash values. Some of this slowdown
+  was attributed to taint analysis, which is fixed now. (#GH105493)
+
 - ``std::addressof``, ``std::as_const``, ``std::forward``,
   ``std::forward_like``, ``std::move``, ``std::move_if_noexcept``, are now
   modeled just like their builtin counterpart. (#GH94193)
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 29aa6a3b8a16e..737bc8e86cfb6 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -407,6 +407,11 @@ ANALYZER_OPTION(
 ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity",
                 "The maximum complexity of symbolic constraint.", 35)
 
+// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+// Ideally, we should get rid of this option soon.
+ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity",
+                "[DEPRECATED] The maximum complexity of a symbol to carry taint", 9)
+
 ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large",
                 "The maximum times a large function could be inlined.", 32)
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 6362c82b009d7..0bb5739db4b75 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -12,6 +12,7 @@
 
 #include "clang/StaticAnalyzer/Checkers/Taint.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include <optional>
 
@@ -256,6 +257,12 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
   if (!Sym)
     return TaintedSymbols;
 
+  // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
+  if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
+      Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
+    return {};
+  }
+
   // Traverse all the symbols this symbol depends on to see if any are tainted.
   for (SymbolRef SubSym : Sym->symbols()) {
     if (!isa<SymbolData>(SubSym))
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 2a4c40005a4dc..1ee0d8e4eeceb 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -96,6 +96,7 @@
 // CHECK-NEXT: max-inlinable-size = 100
 // CHECK-NEXT: max-nodes = 225000
 // CHECK-NEXT: max-symbol-complexity = 35
+// CHECK-NEXT: max-tainted-symbol-complexity = 9
 // CHECK-NEXT: max-times-inline-large = 32
 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c
index b0df85f237298..1c139312734bc 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -63,6 +63,7 @@ void clang_analyzer_isTainted_char(char);
 void clang_analyzer_isTainted_wchar(wchar_t);
 void clang_analyzer_isTainted_charp(char*);
 void clang_analyzer_isTainted_int(int);
+void clang_analyzer_dump_int(int);
 
 int coin();
 
@@ -459,7 +460,53 @@ unsigned radar11369570_hanging(const unsigned char *arr, int l) {
     longcmp(a, t, c);
     l -= 12;
   }
-  return 5/a; // expected-warning {{Division by a tainted value, possibly zero}}
+  return 5/a; // FIXME: Should be a "div by tainted" warning here.
+}
+
+// This computation used to take a very long time.
+void complex_taint_queries(const int *p) {
+  int tainted = 0;
+  scanf("%d", &tainted);
+
+  // Make "tmp" tainted.
+  int tmp = tainted + tainted;
+  clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+  // Make "tmp" SymExpr a lot more complicated by applying computation.
+  // This should balloon the symbol complexity.
+  tmp += p[0] + p[0];
+  tmp += p[1] + p[1];
+  tmp += p[2] + p[2];
+  clang_analyzer_dump_int(tmp); // expected-warning{{((((conj_}} symbol complexity: 8
+  clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}}
+
+  tmp += p[3] + p[3];
+  clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10
+  clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already too complex to be traversed
+
+  tmp += p[4] + p[4];
+  tmp += p[5] + p[5];
+  tmp += p[6] + p[6];
+  tmp += p[7] + p[7];
+  tmp += p[8] + p[8];
+  tmp += p[9] + p[9];
+  tmp += p[10] + p[10];
+  tmp += p[11] + p[11];
+  tmp += p[12] + p[12];
+  tmp += p[13] + p[13];
+  tmp += p[14] + p[14];
+  tmp += p[15] + p[15];
+
+  // The SymExpr still holds the full history of the computation, yet, "isTainted" doesn't traverse the tree as the complexity is over the threshold.
+  clang_analyzer_dump_int(tmp);
+  // expected-warning@-1{{(((((((((((((((((conj_}} symbol complexity: 34
+  clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} FIXME: Ideally, this should still result in "tainted".
+
+  // By making it even one step more complex, then it would hit the "max-symbol-complexity"
+  // threshold and the engine would cut the SymExpr and replace it by a new conjured symbol.
+  tmp += p[16];
+  clang_analyzer_dump_int(tmp); // expected-warning{{conj_}} symbol complexity: 1
+  clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}}
 }
 
 // Check that we do not assert of the following code.

From 40b076410194df3783b0c9cefa9f018fb190bdff Mon Sep 17 00:00:00 2001
From: alx32 <103613512+alx32@users.noreply.github.com>
Date: Wed, 14 Aug 2024 19:30:41 -0700
Subject: [PATCH 230/427] [lld-macho] Fix crash: ObjC category merge + relative
 method lists (#104081)

A crash was happening when both ObjC Category Merging and Relative
method lists were enabled.

ObjC Category Merging creates new data sections and adds them by calling
`addInputSection`. `addInputSection` uses the symbols within the added
section to determine which container to actually add the section to.

The issue is that ObjC Category merging is calling `addInputSection`
before actually adding the relevant symbols the the added section. This
causes `addInputSection` to add the `InputSection` to the wrong
container, eventually resulting in a crash.

To fix this, we ensure that ObjC Category Merging calls
`addInputSection` only after the symbols have been added to the
`InputSection`.

(cherry picked from commit 0df91893efc752a76c7bbe6b063d66c8a2fa0d55)
---
 lld/MachO/ObjC.cpp                            | 10 +++++-----
 .../MachO/objc-category-merging-minimal.s     | 20 +++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 9c056f40aa943..39d885188d34a 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -873,7 +873,6 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -889,6 +888,7 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -933,7 +933,6 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -949,6 +948,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -974,7 +974,6 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
                                bodyData, infoCategoryWriter.catListInfo.align);
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
   newCatList->live = true;
-  addInputSection(newCatList);
 
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
 
@@ -990,6 +989,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
 
   catListSym->used = true;
   objFile->symbols.push_back(catListSym);
+  addInputSection(newCatList);
   return catListSym;
 }
 
@@ -1012,7 +1012,6 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
                                bodyData, infoCategoryWriter.catBodyInfo.align);
   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
   newBodySec->live = true;
-  addInputSection(newBodySec);
 
   std::string symName =
       objc::symbol_names::category + baseClassName + "(" + name + ")";
@@ -1025,6 +1024,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
 
   catBodySym->used = true;
   objFile->symbols.push_back(catBodySym);
+  addInputSection(newBodySec);
 
   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -1245,7 +1245,6 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
           infoCategoryWriter.catListInfo.align);
       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
       listSec->live = true;
-      addInputSection(listSec);
 
       std::string slotSymName = "<__objc_catlist slot for category ";
       slotSymName += nonErasedCatBody->getName();
@@ -1260,6 +1259,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
 
       catListSlotSym->used = true;
       objFile->symbols.push_back(catListSlotSym);
+      addInputSection(listSec);
 
       // Now link the category body into the newly created slot
       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
diff --git a/lld/test/MachO/objc-category-merging-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s
index 527493303c583..b94799a57a4d8 100644
--- a/lld/test/MachO/objc-category-merging-minimal.s
+++ b/lld/test/MachO/objc-category-merging-minimal.s
@@ -9,7 +9,7 @@
 ## Create our main testing dylib - linking against the fake dylib above
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal.o merge_cat_minimal.s
 # RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_no_merge.dylib a64_fakedylib.dylib merge_cat_minimal.o
-# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
+# RUN: %lld -objc_relative_method_lists -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
 
 ## Now verify that the flag caused category merging to happen appropriatelly
 # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS
@@ -17,7 +17,7 @@
 
 ############ Test merging multiple categories into the base class ############
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s
-# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
+# RUN: %lld -arch arm64 -dylib -objc_relative_method_lists -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
 # RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o
 
 # RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib  | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE
@@ -37,14 +37,14 @@ MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02)
 MERGE_CATS-NEXT:   name {{.*}} Category01|Category02
 MERGE_CATS:       instanceMethods
-MERGE_CATS-NEXT:  24
-MERGE_CATS-NEXT:  2
+MERGE_CATS-NEXT:  entsize 12 (relative)
+MERGE_CATS-NEXT:  count 2
 MERGE_CATS-NEXT:   name {{.*}} cat01_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 MERGE_CATS-NEXT:   name {{.*}} cat02_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 MERGE_CATS-NEXT:         classMethods 0x0
 MERGE_CATS-NEXT:            protocols 0x0
 MERGE_CATS-NEXT:   instanceProperties 0x0
@@ -69,17 +69,17 @@ YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE: baseMethods
-YES_MERGE_INTO_BASE-NEXT: entsize 24
+YES_MERGE_INTO_BASE-NEXT: entsize 12 (relative)
 YES_MERGE_INTO_BASE-NEXT: count 3
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass baseInstanceMethod]
 
 
 #### Check merge swift category into base class ###

From 45b149d2531948d2cc0e9d699a8e5371360a3bdf Mon Sep 17 00:00:00 2001
From: Tim Gymnich <tgymnich@icloud.com>
Date: Thu, 8 Aug 2024 02:51:04 +0200
Subject: [PATCH 231/427] [PowerPC] Respect endianness when bitcasting to fp128
 (#95931)

Fixes #92246

Match the behaviour of `bitcast v2i64 (BUILD_PAIR %lo %hi)` when
encountering `bitcast fp128 (BUILD_PAIR %lo $hi)`.
by inserting a missing swap of the arguments based on endianness.

### Current behaviour:
**fp128**
bitcast fp128 (BUILD_PAIR %lo $hi) => BUILD_FP128 %lo %hi
BUILD_FP128 %lo %hi => MTVSRDD %hi %lo

**v2i64**
bitcast v2i64 (BUILD_PAIR %lo %hi) => BUILD_VECTOR %hi %lo
BUILD_VECTOR %hi %lo => MTVSRDD %lo %hi

(cherry picked from commit 408d82d352eb98e2d0a804c66d359cd7a49228fe)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  | 14 +++++++++-----
 llvm/test/CodeGen/PowerPC/f128-aggregates.ll | 12 ++++++------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index aaf0449a55387..21cf4d9eeac17 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9338,14 +9338,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   SDValue Op0 = Op->getOperand(0);
 
+  SDValue Lo = Op0.getOperand(0);
+  SDValue Hi = Op0.getOperand(1);
+
   if ((Op.getValueType() != MVT::f128) ||
-      (Op0.getOpcode() != ISD::BUILD_PAIR) ||
-      (Op0.getOperand(0).getValueType() != MVT::i64) ||
-      (Op0.getOperand(1).getValueType() != MVT::i64) || !Subtarget.isPPC64())
+      (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
+      (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
     return SDValue();
 
-  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
-                     Op0.getOperand(1));
+  if (!Subtarget.isLittleEndian())
+    std::swap(Lo, Hi);
+
+  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
 }
 
 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index b3d2457d31eeb..4be855e30ea1d 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -283,7 +283,7 @@ define fp128 @testMixedAggregate([3 x i128] %a.coerce) {
 ;
 ; CHECK-BE-LABEL: testMixedAggregate:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrdd v2, r8, r7
+; CHECK-BE-NEXT:    mtvsrdd v2, r7, r8
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: testMixedAggregate:
@@ -310,7 +310,7 @@ define fp128 @testMixedAggregate_02([4 x i128] %a.coerce) {
 ;
 ; CHECK-BE-LABEL: testMixedAggregate_02:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrdd v2, r6, r5
+; CHECK-BE-NEXT:    mtvsrdd v2, r5, r6
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: testMixedAggregate_02:
@@ -344,7 +344,7 @@ define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
 ; CHECK-BE-LABEL: testMixedAggregate_03:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrwa v2, r4
-; CHECK-BE-NEXT:    mtvsrdd v3, r6, r5
+; CHECK-BE-NEXT:    mtvsrdd v3, r5, r6
 ; CHECK-BE-NEXT:    xscvsdqp v2, v2
 ; CHECK-BE-NEXT:    xsaddqp v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r9
@@ -467,7 +467,7 @@ define fp128 @testUnion_01([1 x i128] %a.coerce) {
 ;
 ; CHECK-BE-LABEL: testUnion_01:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    mtvsrdd v2, r3, r4
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: testUnion_01:
@@ -494,7 +494,7 @@ define fp128 @testUnion_02([1 x i128] %a.coerce) {
 ;
 ; CHECK-BE-LABEL: testUnion_02:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT:    mtvsrdd v2, r3, r4
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: testUnion_02:
@@ -521,7 +521,7 @@ define fp128 @testUnion_03([4 x i128] %a.coerce) {
 ;
 ; CHECK-BE-LABEL: testUnion_03:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrdd v2, r8, r7
+; CHECK-BE-NEXT:    mtvsrdd v2, r7, r8
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: testUnion_03:

From 3f768dd6806aeca74bfdf21bde9135d96b137ef3 Mon Sep 17 00:00:00 2001
From: Maciej Gabka <maciej.gabka@arm.com>
Date: Thu, 22 Aug 2024 12:40:01 +0000
Subject: [PATCH 232/427]  Add release note about ABI implementation changes
 for _BitInt on Arm

---
 clang/docs/ReleaseNotes.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ef5a2fda1ea77..5eed9827343dd 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1221,6 +1221,9 @@ Arm and AArch64 Support
    ``-mabi=pauthtest`` option or via specifying ``pauthtest`` environment part of
    target triple.
 
+ - The C23 ``_BitInt`` implementation has been brought into compliance
+   with AAPCS32 and AAPCS64.
+
 Android Support
 ^^^^^^^^^^^^^^^
 

From daea6b9c40a1ee9d44f5658c182094147bb78340 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 22 Aug 2024 11:42:57 +0100
Subject: [PATCH 233/427] [AMDGPU] Add GFX12 test coverage for vmcnt flushing
 in loop headers (#105548)

(cherry picked from commit 61194617ad7862f144e0f6db34175553e8c34763)
---
 .../CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir     | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
index 2417becb7c216..e51174919b8d3 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
@@ -1,5 +1,6 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s
 
 ---
 
@@ -20,6 +21,13 @@
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop
 body:             |
   bb.0:
@@ -58,6 +66,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_noterm
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_noterm
 body:             |
   bb.0:
@@ -129,6 +144,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_load
 body:             |
   bb.0:
@@ -170,6 +192,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_store
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_no_store
 body:             |
   bb.0:
@@ -212,6 +241,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_no_use
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_no_use
 body:             |
   bb.0:
@@ -255,6 +291,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2
 body:             |
   bb.0:
@@ -294,6 +338,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_store
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_store
 body:             |
   bb.0:
@@ -334,6 +386,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_use_in_loop
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_use_in_loop
 body:             |
   bb.0:
@@ -379,6 +438,15 @@ body:             |
 # GFX10-LABEL: bb.2:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.3:
+
+# GFX12-LABEL: waitcnt_vm_loop2_nowait
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.3:
 name:            waitcnt_vm_loop2_nowait
 body:             |
   bb.0:
@@ -427,6 +495,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval
+# GFX12-LABEL: bb.0:
+# GFX12: GLOBAL_LOAD_DWORDX4
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_reginterval
 body:             |
   bb.0:
@@ -467,6 +543,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop2_reginterval2
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_reginterval2
 body:             |
   bb.0:
@@ -513,6 +596,15 @@ body:             |
 # GFX10-NOT: S_WAITCNT 16240
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_zero
+# GFX12-LABEL: bb.0:
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_zero
 body:             |
   bb.0:
@@ -548,6 +640,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10-NOT: S_WAITCNT
 
+# GFX12-LABEL: waitcnt_vm_necessary
+# GFX12-LABEL: bb.0:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12: $vgpr4
+# GFX12-NOT: S_WAITCNT
+# GFX12-LABEL: bb.1:
+# GFX12-NOT: S_WAITCNT
+
 # GFX9-LABEL: waitcnt_vm_necessary
 # GFX9-LABEL: bb.0:
 # GFX9: S_WAITCNT 3952
@@ -590,6 +690,13 @@ body:             |
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_loop_global_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_loop_global_mem
 body:             |
   bb.0:
@@ -631,6 +738,13 @@ body:             |
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
 
+# GFX12-LABEL: waitcnt_vm_loop_scratch_mem
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
+
 name:            waitcnt_vm_loop_scratch_mem
 body:             |
   bb.0:
@@ -671,6 +785,14 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 11
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_mem
+# GFX12-LABEL: bb.0:
+# GFX12: FLAT_LOAD_DWORD
+# GFX12-NOT: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT_DSCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_flat_mem
 body:             |
   bb.0:
@@ -713,6 +835,13 @@ body:             |
 # GFX10-LABEL: bb.1:
 # GFX10: S_WAITCNT 16
 # GFX10-LABEL: bb.2:
+
+# GFX12-LABEL: waitcnt_vm_loop_flat_load
+# GFX12-LABEL: bb.0:
+# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop_flat_load
 body:             |
   bb.0:

From 441fb41cb487d286977b7e1cdabc3efe4c2010cf Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 22 Aug 2024 11:46:51 +0100
Subject: [PATCH 234/427] [AMDGPU] GFX12 VMEM loads can write VGPR results out
 of order (#105549)

Fix SIInsertWaitcnts to account for this by adding extra waits to avoid
WAW dependencies.

(cherry picked from commit 5506831f7bc8dc04ebe77f4d26940007bfb4ab39)
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              | 23 ++++++++++++++-----
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |  3 +++
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   |  7 +++---
 .../buffer-fat-pointer-atomicrmw-fadd.ll      |  3 +++
 .../buffer-fat-pointer-atomicrmw-fmax.ll      |  5 ++++
 .../buffer-fat-pointer-atomicrmw-fmin.ll      |  5 ++++
 ....amdgcn.struct.buffer.load.format.v3f16.ll |  1 +
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll | 10 +++++++-
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   | 10 ++++++++
 llvm/test/CodeGen/AMDGPU/load-global-i32.ll   |  2 ++
 .../AMDGPU/spill-csr-frame-ptr-reg-copy.ll    |  1 +
 .../CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir     |  8 +++----
 12 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7906e0ee9d785..9efdbd751d96e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -953,6 +953,12 @@ def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
   "Export priority must be explicitly manipulated on GFX11.5"
 >;
 
+def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
+  "HasVmemWriteVgprInOrder",
+  "true",
+  "VMEM instructions of the same type write VGPR results in order"
+>;
+
 //===------------------------------------------------------------===//
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
@@ -1123,7 +1129,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
   FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
   FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
-  FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts
+  FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
+  FeatureVmemWriteVgprInOrder
   ]
 >;
 
@@ -1136,7 +1143,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
   FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
   FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
-  FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
+  FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
+  FeatureVmemWriteVgprInOrder
   ]
 >;
 
@@ -1152,7 +1160,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
    FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
    FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
-   FeatureDefaultComponentZero
+   FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
   ]
 >;
 
@@ -1170,7 +1178,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
    FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
    FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
-   FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero
+   FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero,
+   FeatureVmemWriteVgprInOrder
   ]
 >;
 
@@ -1193,7 +1202,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
    FeatureMaxHardClauseLength63,
    FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
-   FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts
+   FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
+   FeatureVmemWriteVgprInOrder
   ]
 >;
 
@@ -1215,7 +1225,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
    FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
    FeatureGWS, FeatureDefaultComponentZero,
    FeatureMaxHardClauseLength32,
-   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
+   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
+   FeatureVmemWriteVgprInOrder
   ]
 >;
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 902f51ae358d5..9386bcf0d74b2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -239,6 +239,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasVALUTransUseHazard = false;
   bool HasForceStoreSC0SC1 = false;
   bool HasRequiredExportPriority = false;
+  bool HasVmemWriteVgprInOrder = false;
 
   bool RequiresCOV6 = false;
 
@@ -1285,6 +1286,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
 
+  bool hasVmemWriteVgprInOrder() const { return HasVmemWriteVgprInOrder; }
+
   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 1315aa0855788..9e91548ba598d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1778,11 +1778,12 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
           if (IsVGPR) {
             // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the
             // previous write and this write are the same type of VMEM
-            // instruction, in which case they're guaranteed to write their
-            // results in order anyway.
+            // instruction, in which case they are (in some architectures)
+            // guaranteed to write their results in order anyway.
             if (Op.isUse() || !updateVMCntOnly(MI) ||
                 ScoreBrackets.hasOtherPendingVmemTypes(RegNo,
-                                                       getVmemType(MI))) {
+                                                       getVmemType(MI)) ||
+                !ST->hasVmemWriteVgprInOrder()) {
               ScoreBrackets.determineWait(LOAD_CNT, RegNo, Wait);
               ScoreBrackets.determineWait(SAMPLE_CNT, RegNo, Wait);
               ScoreBrackets.determineWait(BVH_CNT, RegNo, Wait);
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
index 23e8f98a7861b..dc3fc6529e24f 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
@@ -1398,6 +1398,7 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall(ptr a
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
@@ -2662,6 +2663,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v7, v10, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
@@ -4141,6 +4143,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v8, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB11_1
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
index ec0408236975d..8139f2d2eef3c 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll
@@ -1255,6 +1255,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall(ptr a
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
@@ -2449,6 +2450,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v8, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
@@ -3949,6 +3951,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v8, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB11_1
@@ -5319,6 +5322,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v4, s[4:7], null offen offset:1024
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
@@ -6812,6 +6816,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v4, s[4:7], null offen offset:1024
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
index cd01cc7309fcd..d029aa6769bab 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll
@@ -1255,6 +1255,7 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall(ptr a
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
@@ -2449,6 +2450,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v8, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
@@ -3949,6 +3951,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v8, s[4:7], null offen
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB11_1
@@ -5319,6 +5322,7 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v4, s[4:7], null offen offset:1024
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
@@ -6812,6 +6816,7 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_b32 v6, v4, s[4:7], null offen offset:1024
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
index 4c1ae4c228adb..0522d5258b9b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
@@ -128,6 +128,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
+; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    buffer_load_d16_format_xyz v[5:6], v4, s[4:7], null idxen
 ; GFX12-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 355c296d122ff..22b718935738b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -745,7 +745,7 @@ define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[2:3], 0x24
 ; GFX12-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_clause 0xf
+; GFX12-NEXT:    s_clause 0x7
 ; GFX12-NEXT:    global_load_u16 v3, v8, s[0:1] offset:28
 ; GFX12-NEXT:    global_load_u16 v2, v8, s[0:1] offset:24
 ; GFX12-NEXT:    global_load_u16 v1, v8, s[0:1] offset:20
@@ -754,13 +754,21 @@ define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #
 ; GFX12-NEXT:    global_load_u16 v6, v8, s[0:1] offset:8
 ; GFX12-NEXT:    global_load_u16 v5, v8, s[0:1] offset:4
 ; GFX12-NEXT:    global_load_u16 v4, v8, s[0:1]
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
+; GFX12-NEXT:    s_wait_loadcnt 0x7
 ; GFX12-NEXT:    global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
 ; GFX12-NEXT:    s_wait_loadcnt 0x4
 ; GFX12-NEXT:    global_store_b128 v[0:1], v[0:3], off
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 142bc37fdeb75..4cc47b09d813d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -3563,15 +3563,19 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
@@ -4371,8 +4375,10 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
@@ -7341,8 +7347,10 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v13, v39
@@ -7364,8 +7372,10 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:64
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload
+; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload
 ; GCN-NOHSA-SI-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
index c0649322c8195..7cdf270810dea 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
@@ -3091,8 +3091,10 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %ou
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:240
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:192
 ; SI-NOHSA-NEXT:    buffer_load_dword v8, off, s[12:15], 0 ; 4-byte Folded Reload
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NOHSA-NEXT:    buffer_load_dword v9, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
 ; SI-NOHSA-NEXT:    buffer_load_dword v10, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
+; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NOHSA-NEXT:    buffer_load_dword v11, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
 ; SI-NOHSA-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NOHSA-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index b045dd559aac2..34bcc3f02ac66 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -15,6 +15,7 @@
 
 ; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
 ; GCN: s_xor_saveexec_b64
+; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
 ; GCN-NEXT: s_mov_b64 exec, -1
 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
index e51174919b8d3..bdef55ab956a0 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
@@ -297,7 +297,7 @@ body:             |
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
-# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2
 body:             |
@@ -344,7 +344,7 @@ body:             |
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
-# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_store
 body:             |
@@ -445,7 +445,7 @@ body:             |
 # GFX12-LABEL: bb.1:
 # GFX12-NOT: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
-# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.3:
 name:            waitcnt_vm_loop2_nowait
 body:             |
@@ -602,7 +602,7 @@ body:             |
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
-# GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
 
 name:            waitcnt_vm_zero

From 09cca6b1897d501020c02769f9a937401f13e37a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 23 Aug 2024 10:31:33 +0100
Subject: [PATCH 235/427] [AMDGPU] Remove one case of vmcnt loop header
 flushing for GFX12 (#105550)

When a loop contains a VMEM load whose result is only used outside the
loop, do not bother to flush vmcnt in the loop head on GFX12. A wait for
vmcnt will be required inside the loop anyway, because VMEM instructions
can write their VGPR results out of order.

(cherry picked from commit fa2dccb377d0b712223efe5b62e5fc633580a9e6)
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp     |  2 +-
 llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 9e91548ba598d..13130db884dc1 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -2390,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
   }
   if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside)
     return true;
-  return HasVMemLoad && UsesVgprLoadedOutside;
+  return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder();
 }
 
 bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
index bdef55ab956a0..0ddd2aa285b26 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir
@@ -295,7 +295,7 @@ body:             |
 # GFX12-LABEL: waitcnt_vm_loop2
 # GFX12-LABEL: bb.0:
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
-# GFX12: S_WAIT_LOADCNT 0
+# GFX12-NOT: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
@@ -342,7 +342,7 @@ body:             |
 # GFX12-LABEL: waitcnt_vm_loop2_store
 # GFX12-LABEL: bb.0:
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
-# GFX12: S_WAIT_LOADCNT 0
+# GFX12-NOT: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
@@ -499,9 +499,9 @@ body:             |
 # GFX12-LABEL: waitcnt_vm_loop2_reginterval
 # GFX12-LABEL: bb.0:
 # GFX12: GLOBAL_LOAD_DWORDX4
-# GFX12: S_WAIT_LOADCNT 0
-# GFX12-LABEL: bb.1:
 # GFX12-NOT: S_WAIT_LOADCNT 0
+# GFX12-LABEL: bb.1:
+# GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:
 name:            waitcnt_vm_loop2_reginterval
 body:             |
@@ -600,7 +600,7 @@ body:             |
 # GFX12-LABEL: bb.0:
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
 # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
-# GFX12: S_WAIT_LOADCNT 0
+# GFX12-NOT: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.1:
 # GFX12: S_WAIT_LOADCNT 0
 # GFX12-LABEL: bb.2:

From 00ff55d61c765467e9a72c0fd570343d3cfb3b43 Mon Sep 17 00:00:00 2001
From: Ian Anderson <iana@apple.com>
Date: Thu, 22 Aug 2024 13:44:58 -0700
Subject: [PATCH 236/427] [libunwind] Stop installing the mach-o module map
 (#105616)

libunwind shouldn't know that compact_unwind_encoding.h is part of a
MachO module that it doesn't own. Delete the mach-o module map, and let
whatever is in charge of the mach-o directory be the one to say how its
module is organized and where compact_unwind_encoding.h fits in.

(cherry picked from commit 172c4a4a147833f1c08df1555f3170aa9ccb6cbe)
---
 libunwind/include/CMakeLists.txt                           | 1 -
 libunwind/include/mach-o/compact_unwind_encoding.modulemap | 4 ----
 2 files changed, 5 deletions(-)
 delete mode 100644 libunwind/include/mach-o/compact_unwind_encoding.modulemap

diff --git a/libunwind/include/CMakeLists.txt b/libunwind/include/CMakeLists.txt
index 51065d68afd4e..6796d67a3354f 100644
--- a/libunwind/include/CMakeLists.txt
+++ b/libunwind/include/CMakeLists.txt
@@ -3,7 +3,6 @@ set(files
     libunwind.h
     libunwind.modulemap
     mach-o/compact_unwind_encoding.h
-    mach-o/compact_unwind_encoding.modulemap
     unwind_arm_ehabi.h
     unwind_itanium.h
     unwind.h
diff --git a/libunwind/include/mach-o/compact_unwind_encoding.modulemap b/libunwind/include/mach-o/compact_unwind_encoding.modulemap
deleted file mode 100644
index 6eae657d31b5c..0000000000000
--- a/libunwind/include/mach-o/compact_unwind_encoding.modulemap
+++ /dev/null
@@ -1,4 +0,0 @@
-module MachO.compact_unwind_encoding [system] {
-  header "compact_unwind_encoding.h"
-  export *
-}

From 1b1ddb767e430a29c35c8b12760b7aa21f508d15 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Sat, 24 Aug 2024 19:12:15 -0700
Subject: [PATCH 237/427] [clang-format] Fix a misannotation of redundant
 r_paren as CastRParen (#105921)

Fixes #105880.

(cherry picked from commit 6bc225e0630f28e83290a43c3d9b25b057fc815a)
---
 clang/lib/Format/TokenAnnotator.cpp           | 2 ++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 851f79895ac5a..3b1ddf99888ca 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2874,6 +2874,8 @@ class AnnotatingParser {
     // Search for unexpected tokens.
     for (auto *Prev = BeforeRParen; Prev != LParen; Prev = Prev->Previous) {
       if (Prev->is(tok::r_paren)) {
+        if (Prev->is(TT_CastRParen))
+          return false;
         Prev = Prev->MatchingParen;
         if (!Prev)
           return false;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index c20b50d14b80b..1f15c00573bad 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -741,6 +741,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) {
   EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_CastRParen);
   EXPECT_TOKEN(Tokens[10], tok::amp, TT_UnaryOperator);
 
+  Tokens = annotate("int result = ((int)a) - b;");
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::r_paren, TT_CastRParen);
+  EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+  EXPECT_TOKEN(Tokens[9], tok::minus, TT_BinaryOperator);
+
   auto Style = getLLVMStyle();
   Style.TypeNames.push_back("Foo");
   Tokens = annotate("#define FOO(bar) foo((Foo)&bar)", Style);

From d9806ffe4e4d26de9c01f6b8ac0deae169b1d88d Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Sat, 24 Aug 2024 20:10:03 -0700
Subject: [PATCH 238/427] [clang-format] Fix a misannotation of less/greater as
 angle brackets (#105941)

Fixes #105877.

(cherry picked from commit 0916ae49b89db6eb9eee9f6fee4f1a65fd9cdb74)
---
 clang/lib/Format/TokenAnnotator.cpp           | 2 +-
 clang/unittests/Format/TokenAnnotatorTest.cpp | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3b1ddf99888ca..2f6ffb2e50865 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -249,7 +249,7 @@ class AnnotatingParser {
         if (Precedence > prec::Conditional && Precedence < prec::Relational)
           return false;
       }
-      if (Prev.is(TT_ConditionalExpr))
+      if (Prev.isOneOf(tok::question, tok::colon) && !Style.isProto())
         SeenTernaryOperator = true;
       updateParameterCount(Left, CurrentToken);
       if (Style.Language == FormatStyle::LK_Proto) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 1f15c00573bad..51afe1bf2ff9b 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -614,6 +614,11 @@ TEST_F(TokenAnnotatorTest, UnderstandsNonTemplateAngleBrackets) {
   EXPECT_TOKEN(Tokens[2], tok::less, TT_BinaryOperator);
   EXPECT_TOKEN(Tokens[8], tok::greater, TT_BinaryOperator);
 
+  Tokens = annotate("return checklower ? a < b : a > b;");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::less, TT_BinaryOperator);
+  EXPECT_TOKEN(Tokens[8], tok::greater, TT_BinaryOperator);
+
   Tokens = annotate("return A < B ^ A > B;");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::less, TT_BinaryOperator);

From ed699666de2d82eab266bf41372175da73202834 Mon Sep 17 00:00:00 2001
From: Zaara Syeda <syzaara@ca.ibm.com>
Date: Thu, 22 Aug 2024 09:55:46 -0400
Subject: [PATCH 239/427] [PowerPC] Fix mask for __st[d/w/h/b]cx builtins
 (#104453)

These builtins are currently returning CR0 which will have the format
[0, 0, flag_true_if_saved, XER].
We only want to return flag_true_if_saved. This patch adds a shift to
remove the XER bit before returning.

(cherry picked from commit 327edbe07ab4370ceb20ea7c805f64950871d835)
---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td             |  4 ++--
 llvm/lib/Target/PowerPC/PPCInstrInfo.td              | 12 ++++++------
 ...-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll |  2 +-
 .../builtins-ppc-xlcompat-LoadReserve-StoreCond.ll   |  9 ++++++---
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 8f5afbae01de1..ed39fc67a0a73 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -2014,9 +2014,9 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
 } // IsISA3_0
 
 def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
-          (STDCX g8rc:$A, ForceXForm:$dst)>;
+          (RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),
-          (STDCX g8rc:$A, ForceXForm:$dst)>;
+          (RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
 
 def : Pat<(i64 (int_ppc_mfspr timm:$SPR)),
           (MFSPR8 $SPR)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1686249c0f89d..69f8ddc0ea701 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5286,13 +5286,13 @@ def : Pat<(i64 (bitreverse i64:$A)),
   (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
 
 def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
-          (STWCX gprc:$A, ForceXForm:$dst)>;
+          (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
-          (STWCX gprc:$A, ForceXForm:$dst)>;
+          (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
-          (STBCX gprc:$A, ForceXForm:$dst)>;
+          (RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
-          (STBCX gprc:$A, ForceXForm:$dst)>;
+          (RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 
 def : Pat<(int_ppc_fcfid f64:$A),
         (XSCVSXDDP $A)>;
@@ -5322,9 +5322,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
 
 let Predicates = [IsISA2_07] in {
   def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
-            (STHCX gprc:$A, ForceXForm:$dst)>;
+            (RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
   def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),
-            (STHCX gprc:$A, ForceXForm:$dst)>;
+            (RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 }
 def : Pat<(int_ppc_dcbtstt ForceXForm:$dst),
           (DCBTST 16, ForceXForm:$dst)>;
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll
index ddfdcda7a61a7..d765f0845641c 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond-64bit-only.ll
@@ -26,7 +26,7 @@ define dso_local i64 @test_stdcx(ptr %a, i64 %b) {
 ; CHECK-NEXT:    stdcx. 4, 0, 3
 ; CHECK-NEXT:    mfocrf 3, 128
 ; CHECK-NEXT:    srwi 3, 3, 28
-; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.stdcx(ptr %a, i64 %b)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
index 8d90c5cb88206..778fd0a37a1ed 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
@@ -36,7 +36,7 @@ define dso_local signext i32 @test_stwcx(ptr %a, i32 signext %b) {
 ; CHECK-64-NEXT:    stwcx. 4, 0, 3
 ; CHECK-64-NEXT:    mfocrf 3, 128
 ; CHECK-64-NEXT:    srwi 3, 3, 28
-; CHECK-64-NEXT:    extsw 3, 3
+; CHECK-64-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: test_stwcx:
@@ -44,6 +44,7 @@ define dso_local signext i32 @test_stwcx(ptr %a, i32 signext %b) {
 ; CHECK-32-NEXT:    stwcx. 4, 0, 3
 ; CHECK-32-NEXT:    mfocrf 3, 128
 ; CHECK-32-NEXT:    srwi 3, 3, 28
+; CHECK-32-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-32-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.stwcx(ptr %a, i32 %b)
@@ -57,7 +58,7 @@ define dso_local signext i32 @test_sthcx(ptr %a, i16 signext %val) {
 ; CHECK-64-NEXT:    sthcx. 4, 0, 3
 ; CHECK-64-NEXT:    mfocrf 3, 128
 ; CHECK-64-NEXT:    srwi 3, 3, 28
-; CHECK-64-NEXT:    extsw 3, 3
+; CHECK-64-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: test_sthcx:
@@ -65,6 +66,7 @@ define dso_local signext i32 @test_sthcx(ptr %a, i16 signext %val) {
 ; CHECK-32-NEXT:    sthcx. 4, 0, 3
 ; CHECK-32-NEXT:    mfocrf 3, 128
 ; CHECK-32-NEXT:    srwi 3, 3, 28
+; CHECK-32-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-32-NEXT:    blr
 entry:
   %0 = sext i16 %val to i32
@@ -79,7 +81,7 @@ define signext i32 @test_stbcx(ptr %addr, i8 signext %val) {
 ; CHECK-64-NEXT:    stbcx. 4, 0, 3
 ; CHECK-64-NEXT:    mfocrf 3, 128
 ; CHECK-64-NEXT:    srwi 3, 3, 28
-; CHECK-64-NEXT:    extsw 3, 3
+; CHECK-64-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: test_stbcx:
@@ -87,6 +89,7 @@ define signext i32 @test_stbcx(ptr %addr, i8 signext %val) {
 ; CHECK-32-NEXT:    stbcx. 4, 0, 3
 ; CHECK-32-NEXT:    mfocrf 3, 128
 ; CHECK-32-NEXT:    srwi 3, 3, 28
+; CHECK-32-NEXT:    rlwinm 3, 3, 31, 31, 31
 ; CHECK-32-NEXT:    blr
 entry:
   %conv = sext i8 %val to i32

From 456006bc91c3d972f3c549b6296cad5e83630c7d Mon Sep 17 00:00:00 2001
From: SpencerAbson <Spencer.Abson@arm.com>
Date: Fri, 23 Aug 2024 14:27:49 +0100
Subject: [PATCH 240/427] [clang][AArch64] Add SME2.1 feature macros (#105657)

(cherry picked from commit 2617023923175b0fd2a8cb94ad677c061c01627f)
---
 clang/lib/Basic/Targets/AArch64.cpp           | 40 ++++++++++++++-----
 clang/lib/Basic/Targets/AArch64.h             |  3 ++
 .../Preprocessor/aarch64-target-features.c    | 16 ++++++++
 3 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 6ba31cc05a0d7..63fc15f916c55 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -471,23 +471,25 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasSVE2 && HasSVE2SM4)
     Builder.defineMacro("__ARM_FEATURE_SVE2_SM4", "1");
 
+  if (HasSVEB16B16)
+    Builder.defineMacro("__ARM_FEATURE_SVE_B16B16", "1");
+
   if (HasSME) {
     Builder.defineMacro("__ARM_FEATURE_SME");
     Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1");
   }
 
-  if (HasSME2) {
-    Builder.defineMacro("__ARM_FEATURE_SME", "1");
+  if (HasSME2)
     Builder.defineMacro("__ARM_FEATURE_SME2", "1");
-    Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1");
-  }
 
-  if (HasSME2p1) {
-    Builder.defineMacro("__ARM_FEATURE_SME", "1");
-    Builder.defineMacro("__ARM_FEATURE_SME2", "1");
+  if (HasSME2p1)
     Builder.defineMacro("__ARM_FEATURE_SME2p1", "1");
-    Builder.defineMacro("__ARM_FEATURE_LOCALLY_STREAMING", "1");
-  }
+
+  if (HasSMEF16F16)
+    Builder.defineMacro("__ARM_FEATURE_SME_F16F16", "1");
+
+  if (HasSMEB16B16)
+    Builder.defineMacro("__ARM_FEATURE_SME_B16B16", "1");
 
   if (HasCRC)
     Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
@@ -749,6 +751,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
       .Case("sve", FPU & SveMode)
       .Case("sve-bf16", FPU & SveMode && HasBFloat16)
       .Case("sve-i8mm", FPU & SveMode && HasMatMul)
+      .Case("sve-b16b16", HasSVEB16B16)
       .Case("f32mm", FPU & SveMode && HasMatmulFP32)
       .Case("f64mm", FPU & SveMode && HasMatmulFP64)
       .Case("sve2", FPU & SveMode && HasSVE2)
@@ -763,6 +766,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
       .Case("sme-f64f64", HasSMEF64F64)
       .Case("sme-i16i64", HasSMEI16I64)
       .Case("sme-fa64", HasSMEFA64)
+      .Case("sme-f16f16", HasSMEF16F16)
+      .Case("sme-b16b16", HasSMEB16B16)
       .Cases("memtag", "memtag2", HasMTE)
       .Case("sb", HasSB)
       .Case("predres", HasPredRes)
@@ -863,6 +868,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSVE2 = true;
       HasSVE2SM4 = true;
     }
+    if (Feature == "+sve-b16b16")
+      HasSVEB16B16 = true;
     if (Feature == "+sve2-bitperm") {
       FPU |= NeonMode;
       FPU |= SveMode;
@@ -919,6 +926,21 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSVE2 = true;
       HasSMEFA64 = true;
     }
+    if (Feature == "+sme-f16f16") {
+      HasSME = true;
+      HasSME2 = true;
+      HasBFloat16 = true;
+      HasFullFP16 = true;
+      HasSMEF16F16 = true;
+    }
+    if (Feature == "+sme-b16b16") {
+      HasSME = true;
+      HasSME2 = true;
+      HasBFloat16 = true;
+      HasFullFP16 = true;
+      HasSVEB16B16 = true;
+      HasSMEB16B16 = true;
+    }
     if (Feature == "+sb")
       HasSB = true;
     if (Feature == "+predres")
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 7bdf5a2b4106e..526f7f30a3861 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -53,6 +53,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasSVE2AES = false;
   bool HasSVE2SHA3 = false;
   bool HasSVE2SM4 = false;
+  bool HasSVEB16B16 = false;
   bool HasSVE2BitPerm = false;
   bool HasMatmulFP64 = false;
   bool HasMatmulFP32 = false;
@@ -71,6 +72,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasSME2 = false;
   bool HasSMEF64F64 = false;
   bool HasSMEI16I64 = false;
+  bool HasSMEF16F16 = false;
+  bool HasSMEB16B16 = false;
   bool HasSME2p1 = false;
   bool HasSB = false;
   bool HasPredRes = false;
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 87bd3e142d2c4..ae2bdda6f536c 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -709,3 +709,19 @@
 // CHECK-SME2p1: __ARM_FEATURE_SME 1
 // CHECK-SME2p1: __ARM_FEATURE_SME2 1
 // CHECK-SME2p1: __ARM_FEATURE_SME2p1 1
+
+// RUN: %clang --target=aarch64 -march=armv9-a+sve-b16b16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVEB16B16 %s
+// CHECK-SVEB16B16: __ARM_FEATURE_SVE_B16B16 1
+
+// RUN: %clang --target=aarch64 -march=armv9-a+sme-f16f16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SMEF16F16 %s
+// CHECK-SMEF16F16: __ARM_FEATURE_LOCALLY_STREAMING 1
+// CHECK-SMEF16F16: __ARM_FEATURE_SME 1
+// CHECK-SMEF16F16: __ARM_FEATURE_SME2 1
+// CHECK-SMEF16F16: __ARM_FEATURE_SME_F16F16 1
+
+// RUN: %clang --target=aarch64 -march=armv9-a+sme-b16b16 -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SMEB16B16 %s
+// CHECK-SMEB16B16: __ARM_FEATURE_LOCALLY_STREAMING 1
+// CHECK-SMEB16B16: __ARM_FEATURE_SME 1
+// CHECK-SMEB16B16: __ARM_FEATURE_SME2 1
+// CHECK-SMEB16B16: __ARM_FEATURE_SME_B16B16 1
+// CHECK-SMEB16B16: __ARM_FEATURE_SVE_B16B16 1

From 52ab956704050302926e8afe1c7dbda4578acb9d Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Tue, 27 Aug 2024 09:25:53 +0800
Subject: [PATCH 241/427] [Clang][Sema] Revisit the fix for the lambda within a
 type alias template decl (#89934)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the last patch #82310, we used template depths to tell if such alias
decls contain lambdas, which is wrong because the lambda can also appear
as a part of the default argument, and that would make
`getTemplateInstantiationArgs` provide extra template arguments in
undesired contexts. This leads to issue #89853.

Moreover, our approach
for https://github.com/llvm/llvm-project/issues/82104 was sadly wrong.
We tried to teach `DeduceReturnType` to consider alias template
arguments; however, giving these arguments in the context where they
should have been substituted in a `TransformCallExpr` call is never
correct.

This patch addresses such problems by using a `RecursiveASTVisitor` to
check if the lambda is contained by an alias `Decl`, as well as
twiddling the lambda dependencies - we should also build a dependent
lambda expression if the surrounding alias template arguments were
dependent.

Fixes #89853
Fixes #102760
Fixes #105885

(cherry picked from commit b412ec5d3924c7570c2c96106f95a92403a4e09b)
---
 clang/lib/Sema/SemaTemplateInstantiate.cpp    | 73 +++++++++---------
 .../alias-template-with-lambdas.cpp           | 75 ++++++++++++++++++-
 2 files changed, 110 insertions(+), 38 deletions(-)

diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 8995d461362d7..a09e3be83c454 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprConcepts.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeVisitor.h"
@@ -87,12 +88,19 @@ struct Response {
 // than lambda classes.
 const FunctionDecl *
 getPrimaryTemplateOfGenericLambda(const FunctionDecl *LambdaCallOperator) {
+  if (!isLambdaCallOperator(LambdaCallOperator))
+    return LambdaCallOperator;
   while (true) {
     if (auto *FTD = dyn_cast_if_present<FunctionTemplateDecl>(
             LambdaCallOperator->getDescribedTemplate());
         FTD && FTD->getInstantiatedFromMemberTemplate()) {
       LambdaCallOperator =
           FTD->getInstantiatedFromMemberTemplate()->getTemplatedDecl();
+    } else if (LambdaCallOperator->getPrimaryTemplate()) {
+      // Cases where the lambda operator is instantiated in
+      // TemplateDeclInstantiator::VisitCXXMethodDecl.
+      LambdaCallOperator =
+          LambdaCallOperator->getPrimaryTemplate()->getTemplatedDecl();
     } else if (auto *Prev = cast<CXXMethodDecl>(LambdaCallOperator)
                                 ->getInstantiatedFromMemberFunction())
       LambdaCallOperator = Prev;
@@ -138,22 +146,28 @@ getEnclosingTypeAliasTemplateDecl(Sema &SemaRef) {
 // Check if we are currently inside of a lambda expression that is
 // surrounded by a using alias declaration. e.g.
 //   template <class> using type = decltype([](auto) { ^ }());
-// By checking if:
-//  1. The lambda expression and the using alias declaration share the
-//  same declaration context.
-//  2. They have the same template depth.
 // We have to do so since a TypeAliasTemplateDecl (or a TypeAliasDecl) is never
 // a DeclContext, nor does it have an associated specialization Decl from which
 // we could collect these template arguments.
 bool isLambdaEnclosedByTypeAliasDecl(
-    const FunctionDecl *PrimaryLambdaCallOperator,
+    const FunctionDecl *LambdaCallOperator,
     const TypeAliasTemplateDecl *PrimaryTypeAliasDecl) {
-  return cast<CXXRecordDecl>(PrimaryLambdaCallOperator->getDeclContext())
-                 ->getTemplateDepth() ==
-             PrimaryTypeAliasDecl->getTemplateDepth() &&
-         getLambdaAwareParentOfDeclContext(
-             const_cast<FunctionDecl *>(PrimaryLambdaCallOperator)) ==
-             PrimaryTypeAliasDecl->getDeclContext();
+  struct Visitor : RecursiveASTVisitor<Visitor> {
+    Visitor(const FunctionDecl *CallOperator) : CallOperator(CallOperator) {}
+    bool VisitLambdaExpr(const LambdaExpr *LE) {
+      // Return true to bail out of the traversal, implying the Decl contains
+      // the lambda.
+      return getPrimaryTemplateOfGenericLambda(LE->getCallOperator()) !=
+             CallOperator;
+    }
+    const FunctionDecl *CallOperator;
+  };
+
+  QualType Underlying =
+      PrimaryTypeAliasDecl->getTemplatedDecl()->getUnderlyingType();
+
+  return !Visitor(getPrimaryTemplateOfGenericLambda(LambdaCallOperator))
+              .TraverseType(Underlying);
 }
 
 // Add template arguments from a variable template instantiation.
@@ -290,23 +304,8 @@ Response HandleFunction(Sema &SemaRef, const FunctionDecl *Function,
 
     // If this function is a generic lambda specialization, we are done.
     if (!ForConstraintInstantiation &&
-        isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) {
-      // TypeAliasTemplateDecls should be taken into account, e.g.
-      // when we're deducing the return type of a lambda.
-      //
-      // template <class> int Value = 0;
-      // template <class T>
-      // using T = decltype([]<int U = 0>() { return Value<T>; }());
-      //
-      if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) {
-        if (isLambdaEnclosedByTypeAliasDecl(
-                /*PrimaryLambdaCallOperator=*/getPrimaryTemplateOfGenericLambda(
-                    Function),
-                /*PrimaryTypeAliasDecl=*/TypeAlias.PrimaryTypeAliasDecl))
-          return Response::UseNextDecl(Function);
-      }
+        isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function))
       return Response::Done();
-    }
 
   } else if (Function->getDescribedFunctionTemplate()) {
     assert(
@@ -418,10 +417,9 @@ Response HandleRecordDecl(Sema &SemaRef, const CXXRecordDecl *Rec,
     // Retrieve the template arguments for a using alias declaration.
     // This is necessary for constraint checking, since we always keep
     // constraints relative to the primary template.
-    if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) {
-      const FunctionDecl *PrimaryLambdaCallOperator =
-          getPrimaryTemplateOfGenericLambda(Rec->getLambdaCallOperator());
-      if (isLambdaEnclosedByTypeAliasDecl(PrimaryLambdaCallOperator,
+    if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef);
+        ForConstraintInstantiation && TypeAlias) {
+      if (isLambdaEnclosedByTypeAliasDecl(Rec->getLambdaCallOperator(),
                                           TypeAlias.PrimaryTypeAliasDecl)) {
         Result.addOuterTemplateArguments(TypeAlias.Template,
                                          TypeAlias.AssociatedTemplateArguments,
@@ -1642,12 +1640,17 @@ namespace {
 
     CXXRecordDecl::LambdaDependencyKind
     ComputeLambdaDependency(LambdaScopeInfo *LSI) {
-      auto &CCS = SemaRef.CodeSynthesisContexts.back();
-      if (CCS.Kind ==
-          Sema::CodeSynthesisContext::TypeAliasTemplateInstantiation) {
-        unsigned TypeAliasDeclDepth = CCS.Entity->getTemplateDepth();
+      if (auto TypeAlias =
+              TemplateInstArgsHelpers::getEnclosingTypeAliasTemplateDecl(
+                  getSema());
+          TypeAlias && TemplateInstArgsHelpers::isLambdaEnclosedByTypeAliasDecl(
+                           LSI->CallOperator, TypeAlias.PrimaryTypeAliasDecl)) {
+        unsigned TypeAliasDeclDepth = TypeAlias.Template->getTemplateDepth();
         if (TypeAliasDeclDepth >= TemplateArgs.getNumSubstitutedLevels())
           return CXXRecordDecl::LambdaDependencyKind::LDK_AlwaysDependent;
+        for (const TemplateArgument &TA : TypeAlias.AssociatedTemplateArguments)
+          if (TA.isDependent())
+            return CXXRecordDecl::LambdaDependencyKind::LDK_AlwaysDependent;
       }
       return inherited::ComputeLambdaDependency(LSI);
     }
diff --git a/clang/test/SemaTemplate/alias-template-with-lambdas.cpp b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp
index ff94031e4d86f..5ec93163e4d18 100644
--- a/clang/test/SemaTemplate/alias-template-with-lambdas.cpp
+++ b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp
@@ -91,15 +91,84 @@ void bar() {
 
 namespace GH82104 {
 
-template <typename, typename...> int Zero = 0;
+template <typename, typename... D> constexpr int Value = sizeof...(D);
 
-template <typename T, typename...U>
-using T14 = decltype([]<int V = 0>() { return Zero<T, U...>; }());
+template <typename T, typename... U>
+using T14 = decltype([]<int V = 0>(auto Param) {
+  return Value<T, U...> + V + (int)sizeof(Param);
+}("hello"));
 
 template <typename T> using T15 = T14<T, T>;
 
 static_assert(__is_same(T15<char>, int));
 
+// FIXME: This still crashes because we can't extract template arguments T and U
+// outside of the instantiation context of T16.
+#if 0
+template <typename T, typename... U>
+using T16 = decltype([](auto Param) requires (sizeof(Param) != 1 && sizeof...(U) > 0) {
+  return Value<T, U...> + sizeof(Param);
+});
+static_assert(T16<int, char, float>()(42) == 2 + sizeof(42));
+#endif
 } // namespace GH82104
 
+namespace GH89853 {
+
+template <typename = void>
+static constexpr auto innocuous = []<int m> { return m; };
+
+template <auto Pred = innocuous<>>
+using broken = decltype(Pred.template operator()<42>());
+
+broken<> *boom;
+
+template <auto Pred =
+              []<char c> {
+                (void)static_cast<char>(c);
+              }>
+using broken2 = decltype(Pred.template operator()<42>());
+
+broken2<> *boom2;
+
+template <auto Pred = []<char m> { return m; }>
+using broken3 = decltype(Pred.template operator()<42>());
+
+broken3<> *boom3;
+
+static constexpr auto non_default = []<char c>(True auto) {
+    (void) static_cast<char>(c);
+};
+
+template<True auto Pred>
+using broken4 = decltype(Pred.template operator()<42>(Pred));
+
+broken4<non_default>* boom4;
+
+} // namespace GH89853
+
+namespace GH105885 {
+
+template<int>
+using test = decltype([](auto...) {
+}());
+
+static_assert(__is_same(test<0>, void));
+
+} // namespace GH105885
+
+namespace GH102760 {
+
+auto make_tuple = []< class Tag, class... Captures>(Tag, Captures...) {
+  return []< class _Fun >( _Fun) -> void requires requires { 0; }
+  {};
+};
+
+template < class, class... _As >
+using Result = decltype(make_tuple(0)(_As{}...));
+
+using T = Result<int, int>;
+
+} // namespace GH102760
+
 } // namespace lambda_calls

From 6883c490e04a0f681b95e32eaa74aa82458bdb28 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 27 Aug 2024 14:22:25 -0400
Subject: [PATCH 242/427] [libc++] Add missing include to
 three_way_comp_ref_type.h

We were using a `_LIBCPP_ASSERT_FOO` macro without including `<__assert>`.

rdar://134425695
(cherry picked from commit 0df78123fdaed39d5135c2e4f4628f515e6d549d)
---
 libcxx/include/__algorithm/three_way_comp_ref_type.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libcxx/include/__algorithm/three_way_comp_ref_type.h b/libcxx/include/__algorithm/three_way_comp_ref_type.h
index 70c5818976f07..5702a1fee0826 100644
--- a/libcxx/include/__algorithm/three_way_comp_ref_type.h
+++ b/libcxx/include/__algorithm/three_way_comp_ref_type.h
@@ -9,6 +9,7 @@
 #ifndef _LIBCPP___ALGORITHM_THREE_WAY_COMP_REF_TYPE_H
 #define _LIBCPP___ALGORITHM_THREE_WAY_COMP_REF_TYPE_H
 
+#include <__assert>
 #include <__compare/ordering.h>
 #include <__config>
 #include <__utility/declval.h>

From 32927ca57e805681fa93ed913c0f0d3c075563b7 Mon Sep 17 00:00:00 2001
From: Alexander Richardson <alexrichardson@google.com>
Date: Tue, 27 Aug 2024 15:37:24 -0700
Subject: [PATCH 243/427] [compiler-rt] Fix definition of `usize` on 32-bit
 Windows

32-bit Windows uses `unsigned int` for uintptr_t and size_t.
Commit 18e06e3e2f3d47433e1ed323b8725c76035fc1ac changed uptr to
unsigned long, so it no longer matches the real size_t/uintptr_t and
therefore the current definition of usize result in:
`error C2821: first formal parameter to 'operator new' must be 'size_t'`

However, the real problem is that uptr is wrong to work around the fact
that we have local SIZE_T and SSIZE_T typedefs that trample on the
basetsd.h definitions of the same name and therefore need to match
exactly. Unlike size_t/ssize_t the uppercase ones always use unsigned
long (even on 32-bit).

This commit works around the build breakage by keeping the existing
definitions of uptr/sptr and just changing usize. A follow-up change
will attempt to fix this properly.

Fixes: https://github.com/llvm/llvm-project/issues/101998

Reviewed By: mstorsjo

Pull Request: https://github.com/llvm/llvm-project/pull/106151

(cherry picked from commit bb27dd853a713866c025a94ead8f03a1e25d1b6e)
---
 .../lib/sanitizer_common/sanitizer_internal_defs.h       | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index 294e330c4d561..eebfb00aad7ac 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -194,7 +194,16 @@ typedef u64  OFF64_T;
 #ifdef __SIZE_TYPE__
 typedef __SIZE_TYPE__ usize;
 #else
+// Since we use this for operator new, usize must match the real size_t, but on
+// 32-bit Windows the definition of uptr does not actually match uintptr_t or
+// size_t because we are working around typedef mismatches for the (S)SIZE_T
+// types used in interception.h.
+// Until the definition of uptr has been fixed we have to special case Win32.
+#  if SANITIZER_WINDOWS && SANITIZER_WORDSIZE == 32
+typedef unsigned int usize;
+#  else
 typedef uptr usize;
+#  endif
 #endif
 
 typedef u64 tid_t;

From 9ec54c307b6151b1ddb3f7fe3b7cba4d9309b26c Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Tue, 27 Aug 2024 19:13:27 -0700
Subject: [PATCH 244/427] [clang-format] Fix misalignments of pointers in angle
 brackets (#106013)

Fixes #105898.

(cherry picked from commit 656d5aa95825515a55ded61f19d41053c850c82d)
---
 clang/lib/Format/WhitespaceManager.cpp |  4 +++-
 clang/unittests/Format/FormatTest.cpp  | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index a31874a7c3195..fd4a40a86082e 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -469,7 +469,9 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
     // except if the token is equal, then a space is needed.
     if ((Style.PointerAlignment == FormatStyle::PAS_Right ||
          Style.ReferenceAlignment == FormatStyle::RAS_Right) &&
-        CurrentChange.Spaces != 0 && CurrentChange.Tok->isNot(tok::equal)) {
+        CurrentChange.Spaces != 0 &&
+        !CurrentChange.Tok->isOneOf(tok::equal, tok::r_paren,
+                                    TT_TemplateCloser)) {
       const bool ReferenceNotRightAligned =
           Style.ReferenceAlignment != FormatStyle::RAS_Right &&
           Style.ReferenceAlignment != FormatStyle::RAS_Pointer;
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 82af149e048c9..29200b72d3d00 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -19186,6 +19186,24 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                "X        = func<Type, Type>(looooooooooooooooooooooooong,\n"
                "                            arrrrrrrrrrg);",
                Alignment);
+
+  Alignment.ColumnLimit = 80;
+  Alignment.SpacesInAngles = FormatStyle::SIAS_Always;
+  verifyFormat("void **ptr = reinterpret_cast< void ** >(unkn);\n"
+               "ptr        = reinterpret_cast< void ** >(ptr[0]);",
+               Alignment);
+  verifyFormat("quint32 *dstimg  = reinterpret_cast< quint32 * >(out(i));\n"
+               "quint32 *dstmask = reinterpret_cast< quint32 * >(outmask(i));",
+               Alignment);
+
+  Alignment.SpacesInParens = FormatStyle::SIPO_Custom;
+  Alignment.SpacesInParensOptions.InCStyleCasts = true;
+  verifyFormat("void **ptr = ( void ** )unkn;\n"
+               "ptr        = ( void ** )ptr[0];",
+               Alignment);
+  verifyFormat("quint32 *dstimg  = ( quint32 * )out.scanLine(i);\n"
+               "quint32 *dstmask = ( quint32 * )outmask.scanLine(i);",
+               Alignment);
 }
 
 TEST_F(FormatTest, AlignConsecutiveBitFields) {

From f88180bbc489a587954adfce40cc5c90adc74962 Mon Sep 17 00:00:00 2001
From: Krasimir Georgiev <krasimir@google.com>
Date: Wed, 28 Aug 2024 13:45:17 +0200
Subject: [PATCH 245/427] [clang-format] js handle anonymous classes (#106242)

Addresses a regression in JavaScript when formatting anonymous classes.

---------

Co-authored-by: Owen Pan <owenpiano@gmail.com>
(cherry picked from commit 77d63cfd18aa6643544cf7acd5ee287689d54cca)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 8 ++++++--
 clang/unittests/Format/FormatTestJS.cpp       | 8 ++++++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 688c7c5b1e977..5e56a6944a816 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -3978,6 +3978,9 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
   auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
     return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
   };
+  // JavaScript/TypeScript supports anonymous classes like:
+  // a = class extends foo { }
+  bool JSPastExtendsOrImplements = false;
   // The actual identifier can be a nested name specifier, and in macros
   // it is often token-pasted.
   // An [[attribute]] can be before the identifier.
@@ -3988,6 +3991,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
           FormatTok->isOneOf(tok::period, tok::comma))) {
     if (Style.isJavaScript() &&
         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
+      JSPastExtendsOrImplements = true;
       // JavaScript/TypeScript supports inline object types in
       // extends/implements positions:
       //     class Foo implements {bar: number} { }
@@ -4013,8 +4017,8 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
     case tok::coloncolon:
       break;
     default:
-      if (!ClassName && Previous->is(tok::identifier) &&
-          Previous->isNot(TT_AttributeMacro)) {
+      if (!JSPastExtendsOrImplements && !ClassName &&
+          Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
         ClassName = Previous;
       }
     }
diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp
index b910ce620de7a..4b29ba720f682 100644
--- a/clang/unittests/Format/FormatTestJS.cpp
+++ b/clang/unittests/Format/FormatTestJS.cpp
@@ -579,6 +579,14 @@ TEST_F(FormatTestJS, GoogScopes) {
                "});");
 }
 
+TEST_F(FormatTestJS, GoogAnonymousClass) {
+  verifyFormat("a = class extends goog.structs.a {\n"
+               "  a() {\n"
+               "    return 0;\n"
+               "  }\n"
+               "};");
+}
+
 TEST_F(FormatTestJS, IIFEs) {
   // Internal calling parens; no semi.
   verifyFormat("(function() {\n"
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 51afe1bf2ff9b..fbc7186cac1b8 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3200,6 +3200,12 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit);
   EXPECT_BRACE_KIND(Tokens[11], BK_BracedInit);
   EXPECT_BRACE_KIND(Tokens[13], BK_Block);
+
+  Tokens = annotate("a = class extends goog.a {};",
+                    getGoogleStyle(FormatStyle::LK_JavaScript));
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_ClassLBrace);
+  EXPECT_BRACE_KIND(Tokens[7], BK_Block);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) {

From 491375504831aae3da85ffb288ca3c8b3c94b1ea Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Tue, 6 Aug 2024 21:33:25 -0500
Subject: [PATCH 246/427] Revert "[LinkerWrapper] Extend with usual pass
 options (#96704)" (#102226)

This reverts commit 90ccf2187332ff900d46a58a27cb0353577d37cb.

Fixes: https://github.com/llvm/llvm-project/issues/100212
(cherry picked from commit 030ee841a9c9fbbd6e7c001e751737381da01f7b)

Conflicts:
	clang/test/Driver/linker-wrapper-passes.c
---
 clang/test/Driver/linker-wrapper-passes.c | 71 -----------------------
 clang/test/lit.cfg.py                     | 12 ----
 clang/test/lit.site.cfg.py.in             |  4 --
 3 files changed, 87 deletions(-)
 delete mode 100644 clang/test/Driver/linker-wrapper-passes.c

diff --git a/clang/test/Driver/linker-wrapper-passes.c b/clang/test/Driver/linker-wrapper-passes.c
deleted file mode 100644
index b257c942afa07..0000000000000
--- a/clang/test/Driver/linker-wrapper-passes.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// Check various clang-linker-wrapper pass options after -offload-opt.
-
-// REQUIRES: llvm-plugins, llvm-examples
-// REQUIRES: x86-registered-target
-// REQUIRES: amdgpu-registered-target
-// Setup.
-// RUN: mkdir -p %t
-// RUN: %clang -cc1 -emit-llvm-bc -o %t/host-x86_64-unknown-linux-gnu.bc \
-// RUN:     -disable-O0-optnone -triple=x86_64-unknown-linux-gnu %s
-// RUN: %clang -cc1 -emit-llvm-bc -o %t/openmp-amdgcn-amd-amdhsa.bc \
-// RUN:     -disable-O0-optnone -triple=amdgcn-amd-amdhsa %s
-// RUN: opt %t/openmp-amdgcn-amd-amdhsa.bc -o %t/openmp-amdgcn-amd-amdhsa.bc \
-// RUN:     -passes=forceattrs -force-remove-attribute=f:noinline
-// RUN: clang-offload-packager -o %t/openmp-x86_64-unknown-linux-gnu.out \
-// RUN:     --image=file=%t/openmp-amdgcn-amd-amdhsa.bc,triple=amdgcn-amd-amdhsa
-// RUN: %clang -cc1 -S -o %t/host-x86_64-unknown-linux-gnu.s \
-// RUN:     -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
-// RUN:     -fembed-offload-object=%t/openmp-x86_64-unknown-linux-gnu.out \
-// RUN:     %t/host-x86_64-unknown-linux-gnu.bc
-// RUN: %clang -cc1as -o %t/host-x86_64-unknown-linux-gnu.o \
-// RUN:     -triple x86_64-unknown-linux-gnu -filetype obj -target-cpu x86-64 \
-// RUN:     %t/host-x86_64-unknown-linux-gnu.s
-
-// Check plugin, -passes, and no remarks.
-// RUN: clang-linker-wrapper -o a.out --embed-bitcode \
-// RUN:     --linker-path=/usr/bin/true %t/host-x86_64-unknown-linux-gnu.o \
-// RUN:     %offload-opt-loadbye --offload-opt=-wave-goodbye \
-// RUN:     --offload-opt=-passes="function(goodbye),module(inline)" 2>&1 | \
-// RUN:   FileCheck -match-full-lines -check-prefixes=OUT %s
-
-// Check plugin, -p, and remarks.
-// RUN: clang-linker-wrapper -o a.out --embed-bitcode \
-// RUN:     --linker-path=/usr/bin/true %t/host-x86_64-unknown-linux-gnu.o \
-// RUN:     %offload-opt-loadbye --offload-opt=-wave-goodbye \
-// RUN:     --offload-opt=-p="function(goodbye),module(inline)" \
-// RUN:     --offload-opt=-pass-remarks=inline \
-// RUN:     --offload-opt=-pass-remarks-output=%t/remarks.yml \
-// RUN:     --offload-opt=-pass-remarks-filter=inline \
-// RUN:     --offload-opt=-pass-remarks-format=yaml 2>&1 | \
-// RUN:   FileCheck -match-full-lines -check-prefixes=OUT,REM %s
-// RUN: FileCheck -input-file=%t/remarks.yml -match-full-lines \
-// RUN:     -check-prefixes=YML %s
-
-// Check handling of bad plugin.
-// RUN: not clang-linker-wrapper \
-// RUN:     --offload-opt=-load-pass-plugin=%t/nonexistent.so 2>&1 | \
-// RUN:   FileCheck -match-full-lines -check-prefixes=BAD-PLUGIN %s
-
-//  OUT-NOT: {{.}}
-//      OUT: Bye: f
-// OUT-NEXT: Bye: test
-// REM-NEXT: remark: {{.*}} 'f' inlined into 'test' {{.*}}
-//  OUT-NOT: {{.}}
-
-//  YML-NOT: {{.}}
-//      YML: --- !Passed
-// YML-NEXT: Pass: inline
-// YML-NEXT: Name: Inlined
-// YML-NEXT: Function: test
-// YML-NEXT: Args:
-//      YML:  - Callee: f
-//      YML:  - Caller: test
-//      YML: ...
-//  YML-NOT: {{.}}
-
-// BAD-PLUGIN-NOT: {{.}}
-//     BAD-PLUGIN: {{.*}}Could not load library {{.*}}nonexistent.so{{.*}}
-// BAD-PLUGIN-NOT: {{.}}
-
-void f() {}
-void test() { f(); }
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index 2bd7501136a10..92a3361ce672e 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -110,15 +110,6 @@
 if config.clang_examples:
     config.available_features.add("examples")
 
-if config.llvm_examples:
-    config.available_features.add("llvm-examples")
-
-if config.llvm_linked_bye_extension:
-    config.substitutions.append(("%offload-opt-loadbye", ""))
-else:
-    loadbye = f"-load-pass-plugin={config.llvm_shlib_dir}/Bye{config.llvm_shlib_ext}"
-    config.substitutions.append(("%offload-opt-loadbye", f"--offload-opt={loadbye}"))
-
 
 def have_host_jit_feature_support(feature_name):
     clang_repl_exe = lit.util.which("clang-repl", config.clang_tools_dir)
@@ -223,9 +214,6 @@ def have_host_clang_repl_cuda():
 if config.has_plugins and config.llvm_plugin_ext:
     config.available_features.add("plugins")
 
-if config.llvm_has_plugins and config.llvm_plugin_ext:
-    config.available_features.add("llvm-plugins")
-
 if config.clang_default_pie_on_linux:
     config.available_features.add("default-pie-on-linux")
 
diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in
index 2cc70e52f1aa1..1cbd876ac5bb9 100644
--- a/clang/test/lit.site.cfg.py.in
+++ b/clang/test/lit.site.cfg.py.in
@@ -7,7 +7,6 @@ config.llvm_obj_root = path(r"@LLVM_BINARY_DIR@")
 config.llvm_tools_dir = lit_config.substitute(path(r"@LLVM_TOOLS_DIR@"))
 config.llvm_libs_dir = lit_config.substitute(path(r"@LLVM_LIBS_DIR@"))
 config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
-config.llvm_shlib_ext = "@SHLIBEXT@"
 config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@"
 config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@")
 config.errc_messages = "@LLVM_LIT_ERRC_MESSAGES@"
@@ -40,10 +39,7 @@ config.python_executable = "@Python3_EXECUTABLE@"
 config.use_z3_solver = lit_config.params.get('USE_Z3_SOLVER', "@USE_Z3_SOLVER@")
 config.has_plugins = @CLANG_PLUGIN_SUPPORT@
 config.clang_vendor_uti = "@CLANG_VENDOR_UTI@"
-config.llvm_examples = @LLVM_BUILD_EXAMPLES@
-config.llvm_linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@
 config.llvm_external_lit = path(r"@LLVM_EXTERNAL_LIT@")
-config.llvm_has_plugins = @LLVM_ENABLE_PLUGINS@
 config.standalone_build = @CLANG_BUILT_STANDALONE@
 config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@
 config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@

From bac3db3c8b41c921d8ec895a9dc89ce310a670cb Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Wed, 28 Aug 2024 18:23:54 -0700
Subject: [PATCH 247/427] =?UTF-8?q?[clang-format]=20Revert=20"[clang-forma?=
 =?UTF-8?q?t][NFC]=20Delete=20TT=5FLambdaArrow=20(#70=E2=80=A6=20(#105923)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…519)"

This reverts commit e00d32afb9d33a1eca48e2b041c9688436706c5b and adds a
test for lambda arrow SplitPenalty.

Fixes #105480.
---
 clang/lib/Format/ContinuationIndenter.cpp     | 10 +++---
 clang/lib/Format/FormatToken.h                |  3 +-
 clang/lib/Format/TokenAnnotator.cpp           | 33 ++++++++++--------
 clang/lib/Format/UnwrappedLineParser.cpp      |  2 +-
 clang/unittests/Format/TokenAnnotatorTest.cpp | 34 ++++++++++++++-----
 5 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index b07360425ca6e..7d89f0e63dd22 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -842,10 +842,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
     CurrentState.ContainsUnwrappedBuilder = true;
   }
 
-  if (Current.is(TT_TrailingReturnArrow) &&
-      Style.Language == FormatStyle::LK_Java) {
+  if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java)
     CurrentState.NoLineBreak = true;
-  }
   if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
       (Previous.MatchingParen &&
        (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) {
@@ -1000,7 +998,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   //
   // is common and should be formatted like a free-standing function. The same
   // goes for wrapping before the lambda return type arrow.
-  if (Current.isNot(TT_TrailingReturnArrow) &&
+  if (Current.isNot(TT_LambdaArrow) &&
       (!Style.isJavaScript() || Current.NestingLevel != 0 ||
        !PreviousNonComment || PreviousNonComment->isNot(tok::equal) ||
        !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) {
@@ -1257,7 +1255,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
     }
     return CurrentState.Indent;
   }
-  if (Current.is(TT_TrailingReturnArrow) &&
+  if (Current.is(TT_LambdaArrow) &&
       Previous.isOneOf(tok::kw_noexcept, tok::kw_mutable, tok::kw_constexpr,
                        tok::kw_consteval, tok::kw_static, TT_AttributeSquare)) {
     return ContinuationIndent;
@@ -1590,7 +1588,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   }
   if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
     CurrentState.NestedBlockIndent = State.Column + Current.ColumnWidth + 1;
-  if (Current.isOneOf(TT_LambdaLSquare, TT_TrailingReturnArrow))
+  if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow))
     CurrentState.LastSpace = State.Column;
   if (Current.is(TT_RequiresExpression) &&
       Style.RequiresExpressionIndentation == FormatStyle::REI_Keyword) {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index cc45d5a8c5c1e..9bfeb2052164e 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -102,6 +102,7 @@ namespace format {
   TYPE(JsTypeColon)                                                            \
   TYPE(JsTypeOperator)                                                         \
   TYPE(JsTypeOptionalQuestion)                                                 \
+  TYPE(LambdaArrow)                                                            \
   TYPE(LambdaLBrace)                                                           \
   TYPE(LambdaLSquare)                                                          \
   TYPE(LeadingJavaAnnotation)                                                  \
@@ -725,7 +726,7 @@ struct FormatToken {
   bool isMemberAccess() const {
     return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
            !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
-                    TT_LeadingJavaAnnotation);
+                    TT_LambdaArrow, TT_LeadingJavaAnnotation);
   }
 
   bool isPointerOrReference() const {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 2f6ffb2e50865..3f00a28e62988 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -831,7 +831,7 @@ class AnnotatingParser {
         }
         // An arrow after an ObjC method expression is not a lambda arrow.
         if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next &&
-            CurrentToken->Next->is(TT_TrailingReturnArrow)) {
+            CurrentToken->Next->is(TT_LambdaArrow)) {
           CurrentToken->Next->overwriteFixedType(TT_Unknown);
         }
         Left->MatchingParen = CurrentToken;
@@ -1769,8 +1769,10 @@ class AnnotatingParser {
       }
       break;
     case tok::arrow:
-      if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept))
+      if (Tok->isNot(TT_LambdaArrow) && Tok->Previous &&
+          Tok->Previous->is(tok::kw_noexcept)) {
         Tok->setType(TT_TrailingReturnArrow);
+      }
       break;
     case tok::equal:
       // In TableGen, there must be a value after "=";
@@ -2056,11 +2058,11 @@ class AnnotatingParser {
             TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
             TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
             TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
-            TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral,
-            TT_TemplateString, TT_ObjCStringLiteral, TT_UntouchableMacroFunc,
-            TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro,
-            TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, TT_StructLBrace,
-            TT_UnionLBrace, TT_RequiresClause,
+            TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
+            TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
+            TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro,
+            TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace,
+            TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause,
             TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
             TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
             TT_BracedListLBrace)) {
@@ -2246,7 +2248,7 @@ class AnnotatingParser {
       Contexts.back().IsExpression = true;
     } else if (Current.is(TT_TrailingReturnArrow)) {
       Contexts.back().IsExpression = false;
-    } else if (Current.is(Keywords.kw_assert)) {
+    } else if (Current.isOneOf(TT_LambdaArrow, Keywords.kw_assert)) {
       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
     } else if (Current.Previous &&
                Current.Previous->is(TT_CtorInitializerColon)) {
@@ -2381,7 +2383,7 @@ class AnnotatingParser {
       AutoFound = true;
     } else if (Current.is(tok::arrow) &&
                Style.Language == FormatStyle::LK_Java) {
-      Current.setType(TT_TrailingReturnArrow);
+      Current.setType(TT_LambdaArrow);
     } else if (Current.is(tok::arrow) && Style.isVerilog()) {
       // The implication operator.
       Current.setType(TT_BinaryOperator);
@@ -3278,7 +3280,7 @@ class ExpressionParser {
       }
       if (Current->is(TT_JsComputedPropertyName))
         return prec::Assignment;
-      if (Current->is(TT_TrailingReturnArrow))
+      if (Current->is(TT_LambdaArrow))
         return prec::Comma;
       if (Current->is(TT_FatArrow))
         return prec::Assignment;
@@ -4202,7 +4204,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   }
   if (Right.is(TT_PointerOrReference))
     return 190;
-  if (Right.is(TT_TrailingReturnArrow))
+  if (Right.is(TT_LambdaArrow))
     return 110;
   if (Left.is(tok::equal) && Right.is(tok::l_brace))
     return 160;
@@ -5280,9 +5282,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     return false;
   }
 
-  if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow))
+  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
+      Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
     return true;
-
+  }
   if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
       // In an unexpanded macro call we only find the parentheses and commas
       // in a line; the commas and closing parenthesis do not require a space.
@@ -6300,8 +6303,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
                       tok::kw_class, tok::kw_struct, tok::comment) ||
          Right.isMemberAccess() ||
-         Right.isOneOf(TT_TrailingReturnArrow, tok::lessless, tok::colon,
-                       tok::l_square, tok::at) ||
+         Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
+                       tok::colon, tok::l_square, tok::at) ||
          (Left.is(tok::r_paren) &&
           Right.isOneOf(tok::identifier, tok::kw_const)) ||
          (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 5e56a6944a816..1a1cb65c43637 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2322,7 +2322,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
       // This might or might not actually be a lambda arrow (this could be an
       // ObjC method invocation followed by a dereferencing arrow). We might
       // reset this back to TT_Unknown in TokenAnnotator.
-      FormatTok->setFinalizedType(TT_TrailingReturnArrow);
+      FormatTok->setFinalizedType(TT_LambdaArrow);
       SeenArrow = true;
       nextToken();
       break;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index fbc7186cac1b8..8d71f43dada50 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -42,6 +42,8 @@ class TokenAnnotatorTest : public testing::Test {
   EXPECT_EQ((FormatTok)->getPrecedence(), Prec) << *(FormatTok)
 #define EXPECT_BRACE_KIND(FormatTok, Kind)                                     \
   EXPECT_EQ(FormatTok->getBlockKind(), Kind) << *(FormatTok)
+#define EXPECT_SPLIT_PENALTY(FormatTok, Penalty)                               \
+  EXPECT_EQ(FormatTok->SplitPenalty, Penalty) << *(FormatTok)
 #define EXPECT_TOKEN(FormatTok, Kind, Type)                                    \
   do {                                                                         \
     EXPECT_TOKEN_KIND(FormatTok, Kind);                                        \
@@ -1706,19 +1708,19 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) {
   Tokens = annotate("[]() -> auto {}");
   ASSERT_EQ(Tokens.size(), 9u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[]() -> auto & {}");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[]() -> auto * {}");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[4], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[] {}");
@@ -1734,19 +1736,19 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) {
   Tokens = annotate("[] -> auto {}");
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[2], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[2], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[] -> struct S { return {}; }");
   ASSERT_EQ(Tokens.size(), 12u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[2], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[2], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("foo([&](u32 bar) __attribute__((attr)) -> void {});");
   ASSERT_EQ(Tokens.size(), 22u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[15], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[15], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[17], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[] <typename T> () {}");
@@ -1827,7 +1829,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) {
   ASSERT_EQ(Tokens.size(), 20u) << Tokens;
   EXPECT_TOKEN(Tokens[0], tok::l_square, TT_LambdaLSquare);
   EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
-  EXPECT_TOKEN(Tokens[10], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[10], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[12], tok::kw_requires, TT_RequiresClause);
   EXPECT_TRUE(Tokens[16]->ClosesRequiresClause);
   EXPECT_TOKEN(Tokens[17], tok::l_brace, TT_LambdaLBrace);
@@ -1888,7 +1890,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsLambdas) {
   EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[6], tok::kw_requires, TT_RequiresClause);
   EXPECT_TRUE(Tokens[10]->ClosesRequiresClause);
-  EXPECT_TOKEN(Tokens[11], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[11], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_LambdaLBrace);
 
   Tokens = annotate("[] <typename T> requires Foo<T> (T t) requires Bar<T> {}");
@@ -3321,7 +3323,7 @@ TEST_F(TokenAnnotatorTest, FunctionTryBlock) {
   EXPECT_TOKEN(Tokens[3], tok::l_paren, TT_FunctionDeclarationLParen);
   EXPECT_TOKEN(Tokens[11], tok::colon, TT_CtorInitializerColon);
   EXPECT_TOKEN(Tokens[14], tok::l_square, TT_LambdaLSquare);
-  EXPECT_TOKEN(Tokens[16], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_TOKEN(Tokens[16], tok::arrow, TT_LambdaArrow);
   EXPECT_TOKEN(Tokens[20], tok::l_brace, TT_LambdaLBrace);
   EXPECT_TOKEN(Tokens[31], tok::comma, TT_CtorInitializerComma);
   EXPECT_TOKEN(Tokens[36], tok::l_brace, TT_FunctionLBrace);
@@ -3339,6 +3341,20 @@ TEST_F(TokenAnnotatorTest, TypenameMacro) {
   EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_Unknown);
 }
 
+TEST_F(TokenAnnotatorTest, SplitPenalty) {
+  auto Style = getLLVMStyle();
+  Style.ColumnLimit = 20;
+
+  auto Tokens = annotate("class foo {\n"
+                         "  auto bar()\n"
+                         "      -> bool;\n"
+                         "};",
+                         Style);
+  ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+  EXPECT_TOKEN(Tokens[7], tok::arrow, TT_TrailingReturnArrow);
+  EXPECT_SPLIT_PENALTY(Tokens[7], 23u);
+}
+
 } // namespace
 } // namespace format
 } // namespace clang

From 72a74e44ef6f27c10a2da55fa67bde22d52516c6 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 28 Aug 2024 22:18:08 -0700
Subject: [PATCH 248/427] workflows/release-tasks: Pass required secrets to all
 called workflows (#106286)

Called workflows don't have access to secrets by default, so we need to
explicitly pass secrets that we use.

(cherry picked from commit 9d81e7e36e33aecdee05fef551c0652abafaa052)
---
 .github/workflows/release-doxygen.yml |  7 ++++++-
 .github/workflows/release-lit.yml     |  7 ++++++-
 .github/workflows/release-sources.yml |  4 ++++
 .github/workflows/release-tasks.yml   | 12 ++++++++++++
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release-doxygen.yml b/.github/workflows/release-doxygen.yml
index ef00a438ce7ac..ea95e5bb12b2b 100644
--- a/.github/workflows/release-doxygen.yml
+++ b/.github/workflows/release-doxygen.yml
@@ -25,6 +25,10 @@ on:
         description: 'Upload documentation'
         required: false
         type: boolean
+    secrets:
+      RELEASE_TASKS_USER_TOKEN:
+        description: "Secret used to check user permissions."
+        required: false
 
 jobs:
   release-doxygen:
@@ -63,5 +67,6 @@ jobs:
         if: env.upload
         env:
           GITHUB_TOKEN: ${{ github.token }}
+          USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
         run: |
-          ./llvm/utils/release/github-upload-release.py --token "$GITHUB_TOKEN" --release "${{ inputs.release-version }}" --user "${{ github.actor }}" upload --files ./*doxygen*.tar.xz
+          ./llvm/utils/release/github-upload-release.py --token "$GITHUB_TOKEN" --release "${{ inputs.release-version }}" --user "${{ github.actor }}" --user-token "$USER_TOKEN" upload --files ./*doxygen*.tar.xz
diff --git a/.github/workflows/release-lit.yml b/.github/workflows/release-lit.yml
index 0316ba406041d..9d6f3140e6883 100644
--- a/.github/workflows/release-lit.yml
+++ b/.github/workflows/release-lit.yml
@@ -17,6 +17,10 @@ on:
         description: 'Release Version'
         required: true
         type: string
+    secrets:
+      RELEASE_TASKS_USER_TOKEN:
+        description: "Secret used to check user permissions."
+        required: false
 
 jobs:
   release-lit:
@@ -36,8 +40,9 @@ jobs:
       - name: Check Permissions
         env:
           GITHUB_TOKEN: ${{ github.token }}
+          USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
         run: |
-          ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} check-permissions
+          ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} --user-token "$USER_TOKEN" check-permissions
 
       - name: Setup Cpp
         uses: aminya/setup-cpp@v1
diff --git a/.github/workflows/release-sources.yml b/.github/workflows/release-sources.yml
index 9c5b1a9f01709..edb0449ef7e2c 100644
--- a/.github/workflows/release-sources.yml
+++ b/.github/workflows/release-sources.yml
@@ -16,6 +16,10 @@ on:
         description: Release Version
         required: true
         type: string
+    secrets:
+      RELEASE_TASKS_USER_TOKEN:
+        description: "Secret used to check user permissions."
+        required: false
   # Run on pull_requests for testing purposes.
   pull_request:
     paths:
diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index cf42730aaf817..780dd0ff6325c 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -66,6 +66,9 @@ jobs:
     with:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
       upload: true
+    # Called workflows don't have access to secrets by default, so we need to explicitly pass secrets that we use.
+    secrets:
+      RELEASE_TASKS_USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
 
   release-lit:
     name: Release Lit
@@ -73,6 +76,9 @@ jobs:
     uses: ./.github/workflows/release-lit.yml
     with:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
+    # Called workflows don't have access to secrets by default, so we need to explicitly pass secrets that we use.
+    secrets:
+      RELEASE_TASKS_USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
 
   release-binaries:
     name: Build Release Binaries
@@ -97,6 +103,9 @@ jobs:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
       upload: true
       runs-on: ${{ matrix.runs-on }}
+    # Called workflows don't have access to secrets by default, so we need to explicitly pass secrets that we use.
+    secrets:
+      RELEASE_TASKS_USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
 
   release-sources:
     name: Package Release Sources
@@ -109,3 +118,6 @@ jobs:
     uses: ./.github/workflows/release-sources.yml
     with:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
+    # Called workflows don't have access to secrets by default, so we need to explicitly pass secrets that we use.
+    secrets:
+      RELEASE_TASKS_USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}

From c8c66e01d83323a3db57fade24befb26b0e6fe84 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Thu, 29 Aug 2024 15:42:57 +0800
Subject: [PATCH 249/427] [C++20] [Modules] Don't insert class not in named
 modules to PendingEmittingVTables (#106501)

Close https://github.com/llvm/llvm-project/issues/102933

The root cause of the issue is an oversight in
https://github.com/llvm/llvm-project/pull/102287 that I didn't notice
that PendingEmittingVTables should only accept classes in named modules.

(cherry picked from commit 47615ff2347a8be429404285de3b1c03b411e7af)
---
 clang/include/clang/Serialization/ASTWriter.h |  4 +-
 clang/lib/Serialization/ASTWriter.cpp         |  3 ++
 clang/test/Modules/pr106483.cppm              | 41 +++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Modules/pr106483.cppm

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 71a7c28047e31..700f0ad001116 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -500,8 +500,8 @@ class ASTWriter : public ASTDeserializationListener,
   std::vector<SourceRange> NonAffectingRanges;
   std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;
 
-  /// A list of classes which need to emit the VTable in the corresponding
-  /// object file.
+  /// A list of classes in named modules which need to emit the VTable in
+  /// the corresponding object file.
   llvm::SmallVector<CXXRecordDecl *> PendingEmittingVTables;
 
   /// Computes input files that didn't affect compilation of the current module,
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 7c0636962459e..cb63dec92e331 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3963,6 +3963,9 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
 }
 
 void ASTWriter::handleVTable(CXXRecordDecl *RD) {
+  if (!RD->isInNamedModule())
+    return;
+
   PendingEmittingVTables.push_back(RD);
 }
 
diff --git a/clang/test/Modules/pr106483.cppm b/clang/test/Modules/pr106483.cppm
new file mode 100644
index 0000000000000..a19316b9dd50c
--- /dev/null
+++ b/clang/test/Modules/pr106483.cppm
@@ -0,0 +1,41 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++23 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++23 %t/b.cppm -emit-module-interface -o %t/b.pcm \
+// RUN:   -fprebuilt-module-path=%t
+// RUN: %clang_cc1 -std=c++23 -fprebuilt-module-path=%t %t/b.pcm -emit-llvm \
+// RUN:     -disable-llvm-passes -o - | FileCheck %t/b.cppm
+
+//--- a.cppm
+module;
+
+struct base {
+    virtual void f() const;
+};
+
+inline void base::f() const {
+}
+
+export module a;
+export using ::base;
+
+//--- b.cppm
+module;
+
+struct base {
+    virtual void f() const;
+};
+
+inline void base::f() const {
+}
+
+export module b;
+import a;
+export using ::base;
+
+export extern "C" void func() {}
+
+// We only need to check that the IR are successfully emitted instead of crash.
+// CHECK: func

From 03cc174e0307ec90091c31c621bd6cee4338c4da Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinjabot@gmail.com>
Date: Thu, 29 Aug 2024 11:11:39 +0200
Subject: [PATCH 250/427] Revert "[clang] fix broken canonicalization of
 DeducedTemplateSpecializationType (#95202)"

This reverts commit 2e1ad93961a3f444659c5d02d800e3144acccdb4.

Reverting #95202 in the 19.x branch

Fixes #106182

The change in #95202 causes code to crash and there is
no good way to backport a fix for that as there are ABI-impacting
changes at play.
Instead we revert #95202 in the 19x branch, fixing the regression
and preserving the 18.x behavior (which is GCC's behavior)

https://github.com/llvm/llvm-project/pull/106335#discussion_r1735174841
---
 clang/include/clang/AST/ASTContext.h   |  7 ---
 clang/include/clang/AST/TemplateName.h |  4 +-
 clang/include/clang/AST/Type.h         | 11 ++--
 clang/lib/AST/ASTContext.cpp           | 25 +++------
 clang/lib/AST/TemplateName.cpp         |  9 ++++
 clang/unittests/AST/CMakeLists.txt     |  1 -
 clang/unittests/AST/ProfilingTest.cpp  | 75 --------------------------
 7 files changed, 23 insertions(+), 109 deletions(-)
 delete mode 100644 clang/unittests/AST/ProfilingTest.cpp

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 6d1c8ca8a2f96..16a19645d7f36 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1805,13 +1805,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
                                                 QualType DeducedType,
                                                 bool IsDependent) const;
 
-private:
-  QualType getDeducedTemplateSpecializationTypeInternal(TemplateName Template,
-                                                        QualType DeducedType,
-                                                        bool IsDependent,
-                                                        QualType Canon) const;
-
-public:
   /// Return the unique reference to the type for the specified TagDecl
   /// (struct/union/class/enum) decl.
   QualType getTagDeclType(const TagDecl *Decl) const;
diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h
index e3b7dd261535d..e7313dee01281 100644
--- a/clang/include/clang/AST/TemplateName.h
+++ b/clang/include/clang/AST/TemplateName.h
@@ -347,9 +347,7 @@ class TemplateName {
   /// error.
   void dump() const;
 
-  void Profile(llvm::FoldingSetNodeID &ID) {
-    ID.AddPointer(Storage.getOpaqueValue());
-  }
+  void Profile(llvm::FoldingSetNodeID &ID);
 
   /// Retrieve the template name as a void pointer.
   void *getAsVoidPointer() const { return Storage.getOpaqueValue(); }
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 25defea58c2dc..9a711030cff9c 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -6421,27 +6421,30 @@ class DeducedTemplateSpecializationType : public DeducedType,
 
   DeducedTemplateSpecializationType(TemplateName Template,
                                     QualType DeducedAsType,
-                                    bool IsDeducedAsDependent, QualType Canon)
+                                    bool IsDeducedAsDependent)
       : DeducedType(DeducedTemplateSpecialization, DeducedAsType,
                     toTypeDependence(Template.getDependence()) |
                         (IsDeducedAsDependent
                              ? TypeDependence::DependentInstantiation
                              : TypeDependence::None),
-                    Canon),
+                    DeducedAsType.isNull() ? QualType(this, 0)
+                                           : DeducedAsType.getCanonicalType()),
         Template(Template) {}
 
 public:
   /// Retrieve the name of the template that we are deducing.
   TemplateName getTemplateName() const { return Template;}
 
-  void Profile(llvm::FoldingSetNodeID &ID) const {
+  void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getTemplateName(), getDeducedType(), isDependentType());
   }
 
   static void Profile(llvm::FoldingSetNodeID &ID, TemplateName Template,
                       QualType Deduced, bool IsDependent) {
     Template.Profile(ID);
-    Deduced.Profile(ID);
+    QualType CanonicalType =
+        Deduced.isNull() ? Deduced : Deduced.getCanonicalType();
+    ID.AddPointer(CanonicalType.getAsOpaquePtr());
     ID.AddBoolean(IsDependent || Template.isDependent());
   }
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 3da5e888f2517..1064507f34616 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6269,9 +6269,11 @@ QualType ASTContext::getUnconstrainedType(QualType T) const {
   return T;
 }
 
-QualType ASTContext::getDeducedTemplateSpecializationTypeInternal(
-    TemplateName Template, QualType DeducedType, bool IsDependent,
-    QualType Canon) const {
+/// Return the uniqued reference to the deduced template specialization type
+/// which has been deduced to the given type, or to the canonical undeduced
+/// such type, or the canonical deduced-but-dependent such type.
+QualType ASTContext::getDeducedTemplateSpecializationType(
+    TemplateName Template, QualType DeducedType, bool IsDependent) const {
   // Look in the folding set for an existing type.
   void *InsertPos = nullptr;
   llvm::FoldingSetNodeID ID;
@@ -6282,8 +6284,7 @@ QualType ASTContext::getDeducedTemplateSpecializationTypeInternal(
     return QualType(DTST, 0);
 
   auto *DTST = new (*this, alignof(DeducedTemplateSpecializationType))
-      DeducedTemplateSpecializationType(Template, DeducedType, IsDependent,
-                                        Canon);
+      DeducedTemplateSpecializationType(Template, DeducedType, IsDependent);
   llvm::FoldingSetNodeID TempID;
   DTST->Profile(TempID);
   assert(ID == TempID && "ID does not match");
@@ -6292,20 +6293,6 @@ QualType ASTContext::getDeducedTemplateSpecializationTypeInternal(
   return QualType(DTST, 0);
 }
 
-/// Return the uniqued reference to the deduced template specialization type
-/// which has been deduced to the given type, or to the canonical undeduced
-/// such type, or the canonical deduced-but-dependent such type.
-QualType ASTContext::getDeducedTemplateSpecializationType(
-    TemplateName Template, QualType DeducedType, bool IsDependent) const {
-  QualType Canon = DeducedType.isNull()
-                       ? getDeducedTemplateSpecializationTypeInternal(
-                             getCanonicalTemplateName(Template), QualType(),
-                             IsDependent, QualType())
-                       : DeducedType.getCanonicalType();
-  return getDeducedTemplateSpecializationTypeInternal(Template, DeducedType,
-                                                      IsDependent, Canon);
-}
-
 /// getAtomicType - Return the uniqued reference to the atomic type for
 /// the given value type.
 QualType ASTContext::getAtomicType(QualType T) const {
diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp
index d4e8a8971a971..11544dbb56e31 100644
--- a/clang/lib/AST/TemplateName.cpp
+++ b/clang/lib/AST/TemplateName.cpp
@@ -264,6 +264,15 @@ bool TemplateName::containsUnexpandedParameterPack() const {
   return getDependence() & TemplateNameDependence::UnexpandedPack;
 }
 
+void TemplateName::Profile(llvm::FoldingSetNodeID &ID) {
+  if (const auto* USD = getAsUsingShadowDecl())
+    ID.AddPointer(USD->getCanonicalDecl());
+  else if (const auto *TD = getAsTemplateDecl())
+    ID.AddPointer(TD->getCanonicalDecl());
+  else
+    ID.AddPointer(Storage.getOpaqueValue());
+}
+
 void TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
                          Qualified Qual) const {
   auto handleAnonymousTTP = [](TemplateDecl *TD, raw_ostream &OS) {
diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt
index dcc9bc0f39ac2..29d2b39cff8b1 100644
--- a/clang/unittests/AST/CMakeLists.txt
+++ b/clang/unittests/AST/CMakeLists.txt
@@ -31,7 +31,6 @@ add_clang_unittest(ASTTests
   EvaluateAsRValueTest.cpp
   ExternalASTSourceTest.cpp
   NamedDeclPrinterTest.cpp
-  ProfilingTest.cpp
   RandstructTest.cpp
   RecursiveASTVisitorTest.cpp
   SizelessTypesTest.cpp
diff --git a/clang/unittests/AST/ProfilingTest.cpp b/clang/unittests/AST/ProfilingTest.cpp
deleted file mode 100644
index 27a4a197f1cbf..0000000000000
--- a/clang/unittests/AST/ProfilingTest.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===- unittests/AST/ProfilingTest.cpp --- Tests for Profiling ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/AST/ASTContext.h"
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Tooling/Tooling.h"
-#include "gtest/gtest.h"
-#include <utility>
-
-namespace clang {
-namespace {
-using namespace ast_matchers;
-
-static auto getClassTemplateRedecls() {
-  std::string Code = R"cpp(
-    template <class> struct A;
-    template <class> struct A;
-    template <class> struct A;
-  )cpp";
-  auto AST = tooling::buildASTFromCode(Code);
-  ASTContext &Ctx = AST->getASTContext();
-
-  auto MatchResults = match(classTemplateDecl().bind("id"), Ctx);
-  SmallVector<ClassTemplateDecl *, 3> Res;
-  for (BoundNodes &N : MatchResults) {
-    if (auto *CTD = const_cast<ClassTemplateDecl *>(
-            N.getNodeAs<ClassTemplateDecl>("id")))
-      Res.push_back(CTD);
-  }
-  assert(Res.size() == 3);
-#ifndef NDEBUG
-  for (auto &&I : Res)
-    assert(I->getCanonicalDecl() == Res[0]);
-#endif
-  return std::make_tuple(std::move(AST), Res[1], Res[2]);
-}
-
-template <class T> static void testTypeNode(const T *T1, const T *T2) {
-  {
-    llvm::FoldingSetNodeID ID1, ID2;
-    T1->Profile(ID1);
-    T2->Profile(ID2);
-    ASSERT_NE(ID1, ID2);
-  }
-  auto *CT1 = cast<T>(T1->getCanonicalTypeInternal());
-  auto *CT2 = cast<T>(T2->getCanonicalTypeInternal());
-  {
-    llvm::FoldingSetNodeID ID1, ID2;
-    CT1->Profile(ID1);
-    CT2->Profile(ID2);
-    ASSERT_EQ(ID1, ID2);
-  }
-}
-
-TEST(Profiling, DeducedTemplateSpecializationType_Name) {
-  auto [AST, CTD1, CTD2] = getClassTemplateRedecls();
-  ASTContext &Ctx = AST->getASTContext();
-
-  auto *T1 = cast<DeducedTemplateSpecializationType>(
-      Ctx.getDeducedTemplateSpecializationType(TemplateName(CTD1), QualType(),
-                                               false));
-  auto *T2 = cast<DeducedTemplateSpecializationType>(
-      Ctx.getDeducedTemplateSpecializationType(TemplateName(CTD2), QualType(),
-                                               false));
-  testTypeNode(T1, T2);
-}
-
-} // namespace
-} // namespace clang

From 894ec4e3a1d56a5dd5a8205b4fd734136db87cfd Mon Sep 17 00:00:00 2001
From: Luke Shingles <luke.shingles@gmail.com>
Date: Thu, 29 Aug 2024 11:09:07 +0100
Subject: [PATCH 251/427] [analyzer] Add missing include <unordered_map> to
 llvm/lib/Support/Z3Solver.cpp (#106410)

Resolves #106361. Adding #include <unordered_map> to
llvm/lib/Support/Z3Solver.cpp fixes compilation errors for homebrew
build on macOS with Xcode 14.
https://github.com/Homebrew/homebrew-core/actions/runs/10604291631/job/29390993615?pr=181351
shows that this is resolved when the include is patched in (Linux CI
failure is due to unrelated timeout).

(cherry picked from commit fcb3a0485857c749d04ea234a8c3d629c62ab211)
---
 llvm/lib/Support/Z3Solver.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp
index 5a34ff160f6cf..9aece099b0629 100644
--- a/llvm/lib/Support/Z3Solver.cpp
+++ b/llvm/lib/Support/Z3Solver.cpp
@@ -19,6 +19,7 @@ using namespace llvm;
 #include "llvm/ADT/Twine.h"
 
 #include <set>
+#include <unordered_map>
 
 #include <z3.h>
 

From e1be8cf8723e8577abaeef586ec4c39f30053913 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Date: Wed, 28 Aug 2024 14:20:33 +0100
Subject: [PATCH 252/427] [RemoveDIs] Simplify spliceDebugInfo, fixing
 splice-to-end edge case (#105670)

Not quite NFC, fixes splitBasicBlockBefore case when we split before an
instruction with debug records (but without the headBit set, i.e., we are
splitting before the instruction but after the debug records that come before
it). splitBasicBlockBefore splices the instructions before the split point into
a new block. Prior to this patch, the debug records would get shifted up to the
front of the spliced instructions (as seen in the modified unittest - I believe
the unittest was checking erroneous behaviour). We instead want to leave those
debug records at the end of the spliced instructions.

The functionality of the deleted `else if` branch is covered by the remaining
`if` now that `DestMarker` is set to the trailing marker if `Dest` is `end()`.
Previously the "===" markers were sometimes detached, now we always detach
them and always reattach them.

Note: `deleteTrailingDbgRecords` only "unlinks" the tailing marker from the
block, it doesn't delete anything. The trailing marker is still cleaned up
properly inside the final `if` body with `DestMarker->eraseFromParent();`.

Part 1 of 2 needed for #105571

(cherry picked from commit f5815534d180c544bffd46f09c28b6fc334260fb)
---
 llvm/lib/IR/BasicBlock.cpp                  | 24 +++++++++------------
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp |  4 ++--
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index bf19934da047c..0a9498f051cb5 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -961,9 +961,13 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   // Detach the marker at Dest -- this lets us move the "====" DbgRecords
   // around.
   DbgMarker *DestMarker = nullptr;
-  if (Dest != end()) {
-    if ((DestMarker = getMarker(Dest)))
+  if ((DestMarker = getMarker(Dest))) {
+    if (Dest == end()) {
+      assert(DestMarker == getTrailingDbgRecords());
+      deleteTrailingDbgRecords();
+    } else {
       DestMarker->removeFromParent();
+    }
   }
 
   // If we're moving the tail range of DbgRecords (":::"), absorb them into the
@@ -1005,22 +1009,14 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     } else {
       // Insert them right at the start of the range we moved, ahead of First
       // and the "++++" DbgRecords.
+      // This also covers the rare circumstance where we insert at end(), and we
+      // did not generate the iterator with begin() / getFirstInsertionPt(),
+      // meaning any trailing debug-info at the end of the block would
+      // "normally" have been pushed in front of "First". We move it there now.
       DbgMarker *FirstMarker = createMarker(First);
       FirstMarker->absorbDebugValues(*DestMarker, true);
     }
     DestMarker->eraseFromParent();
-  } else if (Dest == end() && !InsertAtHead) {
-    // In the rare circumstance where we insert at end(), and we did not
-    // generate the iterator with begin() / getFirstInsertionPt(), it means
-    // any trailing debug-info at the end of the block would "normally" have
-    // been pushed in front of "First". Move it there now.
-    DbgMarker *TrailingDbgRecords = getTrailingDbgRecords();
-    if (TrailingDbgRecords) {
-      DbgMarker *FirstMarker = createMarker(First);
-      FirstMarker->absorbDebugValues(*TrailingDbgRecords, true);
-      TrailingDbgRecords->eraseFromParent();
-      deleteTrailingDbgRecords();
-    }
   }
 }
 
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 91a0745a0cc76..835780e63aaf4 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -141,11 +141,11 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) {
   Function *F = M->getFunction("func");
 
   BasicBlock &BB = F->getEntryBlock();
-  auto I = std::prev(BB.end(), 2);
+  auto I = std::prev(BB.end(), 2); // store i32 2, ptr %1.
   BB.splitBasicBlockBefore(I, "before");
 
   BasicBlock &BBBefore = F->getEntryBlock();
-  auto I2 = std::prev(BBBefore.end(), 2);
+  auto I2 = std::prev(BBBefore.end()); // br label %1 (new).
   ASSERT_TRUE(I2->hasDbgRecords());
 }
 

From 6f623478d48c171d59e95b25ea2aca49dca8f135 Mon Sep 17 00:00:00 2001
From: Ties Stuij <ties.stuij@arm.com>
Date: Tue, 23 Jul 2024 14:09:34 +0100
Subject: [PATCH 253/427] [libcxx] don't `#include <cwchar>` if wide chars
 aren't enabled (#99911)

Pull request #96032 unconditionall adds the `cwchar` include in the
`format` umbrella header. However support for wchar_t can be disabled in
the build system (LIBCXX_ENABLE_WIDE_CHARACTERS).

This patch guards against inclusion of `cwchar` in `format` by checking
the `_LIBCPP_HAS_NO_WIDE_CHARACTERS` define.

For clarity I've also merged the include header section that `cwchar`
was in with the one above as they were both guarded by the same `#if`
logic.

(cherry picked from commit ec56790c3b27df4fa1513594ca9a74fd8ad5bf7f)
---
 libcxx/include/format | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/format b/libcxx/include/format
index c3f2b45f0f730..a88b3ef8528e2 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -237,21 +237,21 @@ namespace std {
 #  include <cstdint>
 #  include <cstdlib>
 #  include <cstring>
-#  include <cwchar>
 #  include <initializer_list>
 #  include <limits>
+#  include <locale>
 #  include <new>
 #  include <optional>
+#  include <queue>
+#  include <stack>
 #  include <stdexcept>
 #  include <string>
 #  include <string_view>
 #  include <tuple>
-#endif
 
-#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
-#  include <locale>
-#  include <queue>
-#  include <stack>
+#  if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
+#    include <cwchar>
+#  endif
 #endif
 
 #endif // _LIBCPP_FORMAT

From 1ccd19c4b297e9a2bd1b2bb6bbb9d9ad2acbab40 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Fri, 30 Aug 2024 19:23:45 -0700
Subject: [PATCH 254/427] [clang-format] Correctly annotate braces in ObjC
 square brackets (#106654)

See
https://github.com/llvm/llvm-project/pull/88238#issuecomment-2316954781.

(cherry picked from commit e0f2368cdeb7312973a92fb2d22199d1de540db8)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 1 +
 clang/unittests/Format/TokenAnnotatorTest.cpp | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 1a1cb65c43637..53ddb710d2143 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2668,6 +2668,7 @@ void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
       break;
     }
     case tok::at:
+    case tok::colon:
       nextToken();
       if (FormatTok->is(tok::l_brace)) {
         nextToken();
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 8d71f43dada50..81e2628f28e58 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3238,6 +3238,15 @@ TEST_F(TokenAnnotatorTest, BlockLBrace) {
   EXPECT_BRACE_KIND(Tokens[4], BK_Block);
   EXPECT_TOKEN(Tokens[5], tok::l_brace, TT_BlockLBrace);
   EXPECT_BRACE_KIND(Tokens[5], BK_Block);
+
+  Tokens = annotate("[foo bar:{{0, 1}} baz:baz];",
+                    getLLVMStyle(FormatStyle::LK_ObjC));
+  ASSERT_EQ(Tokens.size(), 17u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_Unknown); // Not TT_BlockLBrace.
+  EXPECT_BRACE_KIND(Tokens[4], BK_Unknown);          // Not BK_Block.
+  EXPECT_BRACE_KIND(Tokens[5], BK_BracedInit);
+  EXPECT_BRACE_KIND(Tokens[9], BK_Unknown);  // Not BK_Block.
+  EXPECT_BRACE_KIND(Tokens[10], BK_Unknown); // Not BK_Block.
 }
 
 TEST_F(TokenAnnotatorTest, SwitchExpression) {

From f131edf6fbe8e2ab7306aba72698daa6153ec91e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@scylladb.com>
Date: Mon, 26 Aug 2024 17:56:45 +0300
Subject: [PATCH 255/427] [Instrumentation] Fix EdgeCounts vector size in
 SetBranchWeights (#99064)

(cherry picked from commit 46a4132e167aa44d8ec7776262ce2a0e6d47de59)
---
 .../Instrumentation/PGOInstrumentation.cpp    | 14 +++++--
 .../Coroutines/coro-pgo-setbranchweights.ll   | 42 +++++++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll

diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 1ce8f58c1aa14..4924d5a317478 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1625,11 +1625,17 @@ void PGOUseFunc::setBranchWeights() {
       continue;
 
     // We have a non-zero Branch BB.
-    unsigned Size = BBCountInfo.OutEdges.size();
-    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
+
+    // SuccessorCount can be greater than OutEdgesCount, because
+    // removed edges don't appear in OutEdges.
+    unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
+    unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
+    assert(OutEdgesCount <= SuccessorCount);
+
+    SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
     uint64_t MaxCount = 0;
-    for (unsigned s = 0; s < Size; s++) {
-      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+    for (unsigned It = 0; It < OutEdgesCount; It++) {
+      const PGOUseEdge *E = BBCountInfo.OutEdges[It];
       const BasicBlock *SrcBB = E->SrcBB;
       const BasicBlock *DestBB = E->DestBB;
       if (DestBB == nullptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll
new file mode 100644
index 0000000000000..4f5f936606ca3
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-pgo-setbranchweights.ll
@@ -0,0 +1,42 @@
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+; RUN: opt < %t/a.ll --passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata
+
+;--- a.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-redhat-linux-gnu"
+
+define void @_bar() presplitcoroutine personality ptr null {
+  %1 = call token @llvm.coro.save(ptr null)
+  %2 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %2, label %5 [
+    i8 0, label %3
+    i8 1, label %4
+  ]
+
+3:                                                ; preds = %0
+  ret void
+
+4:                                                ; preds = %0
+  ret void
+
+5:                                                ; preds = %0
+  ret void
+}
+
+declare token @llvm.coro.save(ptr)
+
+declare i8 @llvm.coro.suspend(token, i1)
+
+;--- a.proftext
+# IR level Instrumentation Flag
+:ir
+
+_bar
+# Func Hash:
+1063705160175073211
+# Num Counters:
+2
+1
+0

From 6d7e428df611861fb1f5151dea938ebfcc7b1363 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty@gmail.com>
Date: Fri, 30 Aug 2024 12:59:05 +0200
Subject: [PATCH 256/427] [builtins] Fix missing main() function in
 float16/bfloat16 support checks (#104478)

The CMake docs state that `check_c_source_compiles()` checks whether the
supplied code "can be compiled as a C source file and linked as an
executable (so it must contain at least a `main()` function)."

https://cmake.org/cmake/help/v3.30/module/CheckCSourceCompiles.html

In practice, this command is a wrapper around `try_compile()`:

- https://gitlab.kitware.com/cmake/cmake/blob/2904ce00d2ed6ad5dac6d3459af62d8223e06ce0/Modules/CheckCSourceCompiles.cmake#L54
- https://gitlab.kitware.com/cmake/cmake/blob/2904ce00d2ed6ad5dac6d3459af62d8223e06ce0/Modules/Internal/CheckSourceCompiles.cmake#L101

When `CMAKE_SOURCE_DIR` is compiler-rt/lib/builtins/,
`CMAKE_TRY_COMPILE_TARGET_TYPE` is set to `STATIC_LIBRARY`, so the
checks for `float16` and `bfloat16` support work as intended in a
Clang + compiler-rt runtime build for example, as it runs CMake
recursively from that directory.

However, when using llvm/ or compiler-rt/ as CMake source directory, as
`CMAKE_TRY_COMPILE_TARGET_TYPE` defaults to `EXECUTABLE`, these checks
will indeed fail if the code doesn't have a `main()` function. This
results in LLVM using x86 SIMD registers when generating calls to
builtins that, with Arch Linux's compiler-rt package for example,
actually use a GPR for their argument or return value as they use
`uint16_t` instead of `_Float16`.

This had been caught in post-commit review:
https://reviews.llvm.org/D145237#4521152. Use of the internal
`CMAKE_C_COMPILER_WORKS` variable is not what hides the issue, however.

PR #69842 tried to fix this by unconditionally setting
`CMAKE_TRY_COMPILE_TARGET_TYPE` to `STATIC_LIBRARY`, but it apparently
caused other issues, so it was reverted. This PR just adds a `main()`
function in the checks, as per the CMake docs.

(cherry picked from commit 68d8b3846ab1e6550910f2a9a685690eee558af2)
---
 compiler-rt/lib/builtins/CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index abea8c498f7bd..e0b2d08c20775 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -867,10 +867,12 @@ else ()
           endif()
         endif()
       endif()
-      check_c_source_compiles("_Float16 foo(_Float16 x) { return x; }"
+      check_c_source_compiles("_Float16 foo(_Float16 x) { return x; }
+                               int main(void) { return 0; }"
                               COMPILER_RT_HAS_${arch}_FLOAT16)
       append_list_if(COMPILER_RT_HAS_${arch}_FLOAT16 -DCOMPILER_RT_HAS_FLOAT16 BUILTIN_CFLAGS_${arch})
-      check_c_source_compiles("__bf16 foo(__bf16 x) { return x; }"
+      check_c_source_compiles("__bf16 foo(__bf16 x) { return x; }
+                               int main(void) { return 0; }"
                               COMPILER_RT_HAS_${arch}_BFLOAT16)
       # Build BF16 files only when "__bf16" is available.
       if(COMPILER_RT_HAS_${arch}_BFLOAT16)

From 95fa0bee9314a878b3a58d748998c3b3ef42bd75 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Thu, 29 Aug 2024 19:14:19 -0700
Subject: [PATCH 257/427] [clang-format] Correctly identify token-pasted record
 names (#106484)

See
https://github.com/llvm/llvm-project/pull/89706#issuecomment-2315549955.

(cherry picked from commit 7579787e05966f21684dd4b4a15b9deac13d09e1)
---
 clang/lib/Format/UnwrappedLineParser.cpp      |  1 +
 clang/unittests/Format/TokenAnnotatorTest.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 53ddb710d2143..7f5d157ae9589 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -4016,6 +4016,7 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
       }
       break;
     case tok::coloncolon:
+    case tok::hashhash:
       break;
     default:
       if (!JSPastExtendsOrImplements && !ClassName &&
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 81e2628f28e58..f0533c92f6538 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3208,6 +3208,16 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_ClassLBrace);
   EXPECT_BRACE_KIND(Tokens[7], BK_Block);
+  EXPECT_TOKEN(Tokens[8], tok::r_brace, TT_ClassRBrace);
+  EXPECT_BRACE_KIND(Tokens[8], BK_Block);
+
+  Tokens = annotate("#define FOO(X) \\\n"
+                    "  struct X##_tag_ {};");
+  ASSERT_EQ(Tokens.size(), 14u) << Tokens;
+  EXPECT_TOKEN(Tokens[10], tok::l_brace, TT_StructLBrace);
+  EXPECT_BRACE_KIND(Tokens[10], BK_Block);
+  EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
+  EXPECT_BRACE_KIND(Tokens[11], BK_Block);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) {

From d9cb501ec0012de5d4e1c6310df55f4e8af011a9 Mon Sep 17 00:00:00 2001
From: Hans <hans@hanshq.net>
Date: Mon, 2 Sep 2024 15:04:13 +0200
Subject: [PATCH 258/427] Win release packaging: Don't try to use rpmalloc for
 32-bit x86 (#106969)

because that doesn't work (results in `LINK : error LNK2001: unresolved
external symbol malloc`).
Based on the title of #91862 it was only intended for use in 64-bit
builds.

(cherry picked from commit ef26afcb88dcb5f2de79bfc3cf88a8ea10f230ec)
---
 llvm/utils/release/build_llvm_release.bat | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/release/build_llvm_release.bat b/llvm/utils/release/build_llvm_release.bat
index 64ae2d41ab2b0..3508748c1d540 100755
--- a/llvm/utils/release/build_llvm_release.bat
+++ b/llvm/utils/release/build_llvm_release.bat
@@ -193,6 +193,7 @@ REM Stage0 binaries directory; used in stage1.
 set "stage0_bin_dir=%build_dir%/build32_stage0/bin"
 set cmake_flags=^
   %common_cmake_flags% ^
+  -DLLVM_ENABLE_RPMALLOC=OFF ^
   -DLLDB_TEST_COMPILER=%stage0_bin_dir%/clang.exe ^
   -DPYTHON_HOME=%PYTHONHOME% ^
   -DPython3_ROOT_DIR=%PYTHONHOME% ^

From 9b6180ed2ecbbb54f26caa78082e7b955a634117 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?kadir=20=C3=A7etinkaya?= <kadircet@google.com>
Date: Mon, 2 Sep 2024 15:25:26 +0200
Subject: [PATCH 259/427] [clangd] Update TidyFastChecks for release/19.x
 (#106354)

Run for clang-tidy checks available in release/19.x branch.

Some notable findings:
- altera-id-dependent-backward-branch, stays slow with 13%.
- misc-const-correctness become faster, going from 261% to 67%, but
still above
  8% threshold.
- misc-header-include-cycle is a new SLOW check with 10% runtime
implications
- readability-container-size-empty went from 16% to 13%, still SLOW.

(cherry picked from commit b47d7ce8121b1cb1923e879d58eaa1d63aeaaae2)
---
 clang-tools-extra/clangd/TidyFastChecks.inc | 669 +++++++++++---------
 1 file changed, 367 insertions(+), 302 deletions(-)

diff --git a/clang-tools-extra/clangd/TidyFastChecks.inc b/clang-tools-extra/clangd/TidyFastChecks.inc
index 9050ce16127ff..de1a025602fa9 100644
--- a/clang-tools-extra/clangd/TidyFastChecks.inc
+++ b/clang-tools-extra/clangd/TidyFastChecks.inc
@@ -7,370 +7,435 @@
 #define SLOW(CHECK, DELTA)
 #endif
 
-FAST(abseil-cleanup-ctad, -1.0)
+FAST(abseil-cleanup-ctad, -2.0)
 FAST(abseil-duration-addition, 0.0)
-FAST(abseil-duration-comparison, 1.0)
-FAST(abseil-duration-conversion-cast, 3.0)
-FAST(abseil-duration-division, -0.0)
-FAST(abseil-duration-factory-float, 1.0)
-FAST(abseil-duration-factory-scale, -0.0)
-FAST(abseil-duration-subtraction, 1.0)
-FAST(abseil-duration-unnecessary-conversion, 4.0)
-FAST(abseil-faster-strsplit-delimiter, 2.0)
-FAST(abseil-no-internal-dependencies, -1.0)
-FAST(abseil-no-namespace, -1.0)
-FAST(abseil-redundant-strcat-calls, 2.0)
-FAST(abseil-str-cat-append, 1.0)
-FAST(abseil-string-find-startswith, 1.0)
-FAST(abseil-string-find-str-contains, 1.0)
-FAST(abseil-time-comparison, -0.0)
-FAST(abseil-time-subtraction, 0.0)
+FAST(abseil-duration-comparison, -1.0)
+FAST(abseil-duration-conversion-cast, -1.0)
+FAST(abseil-duration-division, 0.0)
+FAST(abseil-duration-factory-float, 2.0)
+FAST(abseil-duration-factory-scale, 1.0)
+FAST(abseil-duration-subtraction, -1.0)
+FAST(abseil-duration-unnecessary-conversion, -0.0)
+FAST(abseil-faster-strsplit-delimiter, 3.0)
+FAST(abseil-no-internal-dependencies, 1.0)
+FAST(abseil-no-namespace, -0.0)
+FAST(abseil-redundant-strcat-calls, 1.0)
+FAST(abseil-str-cat-append, -0.0)
+FAST(abseil-string-find-startswith, -1.0)
+FAST(abseil-string-find-str-contains, 4.0)
+FAST(abseil-time-comparison, -1.0)
+FAST(abseil-time-subtraction, 1.0)
 FAST(abseil-upgrade-duration-conversions, 2.0)
 SLOW(altera-id-dependent-backward-branch, 13.0)
-FAST(altera-kernel-name-restriction, -1.0)
-FAST(altera-single-work-item-barrier, -1.0)
-FAST(altera-struct-pack-align, -1.0)
+FAST(altera-kernel-name-restriction, 4.0)
+FAST(altera-single-work-item-barrier, 1.0)
+FAST(altera-struct-pack-align, -0.0)
 FAST(altera-unroll-loops, 2.0)
-FAST(android-cloexec-accept, -1.0)
-FAST(android-cloexec-accept4, 3.0)
-FAST(android-cloexec-creat, 0.0)
-FAST(android-cloexec-dup, 3.0)
-FAST(android-cloexec-epoll-create, -2.0)
-FAST(android-cloexec-epoll-create1, -1.0)
-FAST(android-cloexec-fopen, -0.0)
-FAST(android-cloexec-inotify-init, 1.0)
-FAST(android-cloexec-inotify-init1, 2.0)
-FAST(android-cloexec-memfd-create, 2.0)
-FAST(android-cloexec-open, -1.0)
-FAST(android-cloexec-pipe, -1.0)
+FAST(android-cloexec-accept, 0.0)
+FAST(android-cloexec-accept4, 1.0)
+FAST(android-cloexec-creat, 1.0)
+FAST(android-cloexec-dup, 0.0)
+FAST(android-cloexec-epoll-create, 2.0)
+FAST(android-cloexec-epoll-create1, 0.0)
+FAST(android-cloexec-fopen, -1.0)
+FAST(android-cloexec-inotify-init, 2.0)
+FAST(android-cloexec-inotify-init1, -0.0)
+FAST(android-cloexec-memfd-create, -1.0)
+FAST(android-cloexec-open, 1.0)
+FAST(android-cloexec-pipe, -0.0)
 FAST(android-cloexec-pipe2, 0.0)
 FAST(android-cloexec-socket, 1.0)
-FAST(android-comparison-in-temp-failure-retry, 0.0)
-FAST(boost-use-to-string, 1.0)
-FAST(bugprone-argument-comment, 2.0)
+FAST(android-comparison-in-temp-failure-retry, 1.0)
+FAST(boost-use-ranges, 2.0)
+FAST(boost-use-to-string, 2.0)
+FAST(bugprone-argument-comment, 4.0)
 FAST(bugprone-assert-side-effect, 1.0)
-FAST(bugprone-assignment-in-if-condition, -0.0)
-FAST(bugprone-bad-signal-to-kill-thread, -1.0)
+FAST(bugprone-assignment-in-if-condition, 2.0)
+FAST(bugprone-bad-signal-to-kill-thread, 1.0)
 FAST(bugprone-bool-pointer-implicit-conversion, 0.0)
-FAST(bugprone-branch-clone, -0.0)
+FAST(bugprone-branch-clone, 1.0)
+FAST(bugprone-casting-through-void, 1.0)
+FAST(bugprone-chained-comparison, 1.0)
+FAST(bugprone-compare-pointer-to-member-virtual-function, -0.0)
 FAST(bugprone-copy-constructor-init, 1.0)
-FAST(bugprone-dangling-handle, 0.0)
-FAST(bugprone-dynamic-static-initializers, 1.0)
+FAST(bugprone-crtp-constructor-accessibility, 0.0)
+FAST(bugprone-dangling-handle, -0.0)
+FAST(bugprone-dynamic-static-initializers, 0.0)
 FAST(bugprone-easily-swappable-parameters, 2.0)
-FAST(bugprone-exception-escape, 1.0)
-FAST(bugprone-fold-init-type, 2.0)
+FAST(bugprone-empty-catch, 1.0)
+FAST(bugprone-exception-escape, 0.0)
+FAST(bugprone-fold-init-type, 1.0)
 FAST(bugprone-forward-declaration-namespace, 0.0)
-FAST(bugprone-forwarding-reference-overload, -0.0)
-FAST(bugprone-implicit-widening-of-multiplication-result, 3.0)
+FAST(bugprone-forwarding-reference-overload, -1.0)
+FAST(bugprone-implicit-widening-of-multiplication-result, 2.0)
 FAST(bugprone-inaccurate-erase, -0.0)
+FAST(bugprone-inc-dec-in-conditions, 3.0)
+FAST(bugprone-incorrect-enable-if, -1.0)
 FAST(bugprone-incorrect-roundings, 1.0)
-FAST(bugprone-infinite-loop, 4.0)
-FAST(bugprone-integer-division, -3.0)
-FAST(bugprone-lambda-function-name, 1.0)
-FAST(bugprone-macro-parentheses, 8.0)
+FAST(bugprone-infinite-loop, 1.0)
+FAST(bugprone-integer-division, -0.0)
+FAST(bugprone-lambda-function-name, 0.0)
+FAST(bugprone-macro-parentheses, 1.0)
 FAST(bugprone-macro-repeated-side-effects, 1.0)
-FAST(bugprone-misplaced-operator-in-strlen-in-alloc, -0.0)
-FAST(bugprone-misplaced-pointer-arithmetic-in-alloc, 2.0)
-FAST(bugprone-misplaced-widening-cast, -2.0)
-FAST(bugprone-move-forwarding-reference, -2.0)
-FAST(bugprone-multiple-statement-macro, 1.0)
-FAST(bugprone-narrowing-conversions, -1.0)
-FAST(bugprone-no-escape, 3.0)
-FAST(bugprone-not-null-terminated-result, 4.0)
-FAST(bugprone-parent-virtual-call, 2.0)
-FAST(bugprone-posix-return, 0.0)
-FAST(bugprone-redundant-branch-condition, 0.0)
-FAST(bugprone-reserved-identifier, 2.0)
+FAST(bugprone-misplaced-operator-in-strlen-in-alloc, 0.0)
+FAST(bugprone-misplaced-pointer-arithmetic-in-alloc, -0.0)
+FAST(bugprone-misplaced-widening-cast, -1.0)
+FAST(bugprone-move-forwarding-reference, -1.0)
+FAST(bugprone-multi-level-implicit-pointer-conversion, -1.0)
+FAST(bugprone-multiple-new-in-one-expression, 0.0)
+FAST(bugprone-multiple-statement-macro, 2.0)
+FAST(bugprone-narrowing-conversions, 2.0)
+FAST(bugprone-no-escape, 1.0)
+FAST(bugprone-non-zero-enum-to-bool-conversion, 0.0)
+FAST(bugprone-not-null-terminated-result, 0.0)
+FAST(bugprone-optional-value-conversion, 1.0)
+FAST(bugprone-parent-virtual-call, 1.0)
+FAST(bugprone-pointer-arithmetic-on-polymorphic-object, 0.0)
+FAST(bugprone-posix-return, -0.0)
+FAST(bugprone-redundant-branch-condition, -0.0)
+FAST(bugprone-reserved-identifier, -1.0)
+FAST(bugprone-return-const-ref-from-parameter, -2.0)
 FAST(bugprone-shared-ptr-array-mismatch, 0.0)
-FAST(bugprone-signal-handler, -0.0)
-FAST(bugprone-signed-char-misuse, 1.0)
-FAST(bugprone-sizeof-container, -0.0)
-FAST(bugprone-sizeof-expression, 0.0)
-FAST(bugprone-spuriously-wake-up-functions, 3.0)
-FAST(bugprone-string-constructor, 1.0)
-FAST(bugprone-string-integer-assignment, 1.0)
-FAST(bugprone-string-literal-with-embedded-nul, 0.0)
-FAST(bugprone-stringview-nullptr, 2.0)
-FAST(bugprone-suspicious-enum-usage, -0.0)
-FAST(bugprone-suspicious-include, 1.0)
+FAST(bugprone-signal-handler, -1.0)
+FAST(bugprone-signed-char-misuse, -2.0)
+FAST(bugprone-sizeof-container, -1.0)
+FAST(bugprone-sizeof-expression, 1.0)
+FAST(bugprone-spuriously-wake-up-functions, 1.0)
+FAST(bugprone-standalone-empty, 7.0)
+FAST(bugprone-string-constructor, 3.0)
+FAST(bugprone-string-integer-assignment, -0.0)
+FAST(bugprone-string-literal-with-embedded-nul, 1.0)
+FAST(bugprone-stringview-nullptr, 4.0)
+FAST(bugprone-suspicious-enum-usage, 2.0)
+FAST(bugprone-suspicious-include, 0.0)
 FAST(bugprone-suspicious-memory-comparison, 0.0)
-FAST(bugprone-suspicious-memset-usage, 2.0)
-FAST(bugprone-suspicious-missing-comma, 0.0)
-FAST(bugprone-suspicious-realloc-usage, 1.0)
-FAST(bugprone-suspicious-semicolon, 0.0)
-FAST(bugprone-suspicious-string-compare, 2.0)
-FAST(bugprone-swapped-arguments, 0.0)
-FAST(bugprone-terminating-continue, 2.0)
-FAST(bugprone-throw-keyword-missing, -1.0)
+FAST(bugprone-suspicious-memset-usage, 0.0)
+FAST(bugprone-suspicious-missing-comma, -2.0)
+FAST(bugprone-suspicious-realloc-usage, -0.0)
+FAST(bugprone-suspicious-semicolon, 6.0)
+FAST(bugprone-suspicious-string-compare, 1.0)
+FAST(bugprone-suspicious-stringview-data-usage, 1.0)
+FAST(bugprone-swapped-arguments, 1.0)
+FAST(bugprone-switch-missing-default-case, 2.0)
+FAST(bugprone-terminating-continue, -1.0)
+FAST(bugprone-throw-keyword-missing, 0.0)
 FAST(bugprone-too-small-loop-variable, 0.0)
-FAST(bugprone-unchecked-optional-access, 1.0)
+FAST(bugprone-unchecked-optional-access, 2.0)
 FAST(bugprone-undefined-memory-manipulation, 1.0)
-FAST(bugprone-undelegated-constructor, 0.0)
-FAST(bugprone-unhandled-exception-at-new, 0.0)
+FAST(bugprone-undelegated-constructor, 1.0)
+FAST(bugprone-unhandled-exception-at-new, -1.0)
 FAST(bugprone-unhandled-self-assignment, 0.0)
-FAST(bugprone-unused-raii, 2.0)
-FAST(bugprone-unused-return-value, 0.0)
-FAST(bugprone-use-after-move, 5.0)
+FAST(bugprone-unique-ptr-array-mismatch, 0.0)
+FAST(bugprone-unsafe-functions, 1.0)
+FAST(bugprone-unused-local-non-trivial-variable, -1.0)
+FAST(bugprone-unused-raii, 1.0)
+FAST(bugprone-unused-return-value, 4.0)
+FAST(bugprone-use-after-move, 4.0)
 FAST(bugprone-virtual-near-miss, 0.0)
-FAST(cert-con36-c, 2.0)
-FAST(cert-con54-cpp, 3.0)
-FAST(cert-dcl03-c, 2.0)
-FAST(cert-dcl16-c, -1.0)
-FAST(cert-dcl37-c, 3.0)
+FAST(cert-con36-c, 1.0)
+FAST(cert-con54-cpp, 2.0)
+FAST(cert-ctr56-cpp, 0.0)
+FAST(cert-dcl03-c, 0.0)
+FAST(cert-dcl16-c, 1.0)
+FAST(cert-dcl37-c, 1.0)
 FAST(cert-dcl50-cpp, -1.0)
-FAST(cert-dcl51-cpp, 1.0)
+FAST(cert-dcl51-cpp, -1.0)
 FAST(cert-dcl54-cpp, 0.0)
-FAST(cert-dcl58-cpp, 1.0)
-FAST(cert-dcl59-cpp, 0.0)
-FAST(cert-env33-c, -1.0)
-FAST(cert-err09-cpp, 1.0)
+FAST(cert-dcl58-cpp, -0.0)
+FAST(cert-dcl59-cpp, 1.0)
+FAST(cert-env33-c, 1.0)
+FAST(cert-err09-cpp, -0.0)
 FAST(cert-err33-c, 4.0)
-FAST(cert-err34-c, 0.0)
-FAST(cert-err52-cpp, 1.0)
-FAST(cert-err58-cpp, 0.0)
-FAST(cert-err60-cpp, 0.0)
-FAST(cert-err61-cpp, -0.0)
-FAST(cert-exp42-c, 0.0)
+FAST(cert-err34-c, -1.0)
+FAST(cert-err52-cpp, -1.0)
+FAST(cert-err58-cpp, -0.0)
+FAST(cert-err60-cpp, -0.0)
+FAST(cert-err61-cpp, 2.0)
+FAST(cert-exp42-c, 1.0)
 FAST(cert-fio38-c, 1.0)
-FAST(cert-flp30-c, 0.0)
+FAST(cert-flp30-c, 3.0)
 FAST(cert-flp37-c, 1.0)
+FAST(cert-int09-c, -1.0)
 FAST(cert-mem57-cpp, 0.0)
-FAST(cert-msc30-c, -0.0)
-FAST(cert-msc32-c, 1.0)
-FAST(cert-msc50-cpp, -1.0)
-FAST(cert-msc51-cpp, 1.0)
-FAST(cert-msc54-cpp, -1.0)
-FAST(cert-oop11-cpp, 1.0)
-FAST(cert-oop54-cpp, -0.0)
-FAST(cert-oop57-cpp, 1.0)
+FAST(cert-msc24-c, 0.0)
+FAST(cert-msc30-c, 0.0)
+FAST(cert-msc32-c, -0.0)
+FAST(cert-msc33-c, 2.0)
+FAST(cert-msc50-cpp, -0.0)
+FAST(cert-msc51-cpp, 2.0)
+FAST(cert-msc54-cpp, -0.0)
+FAST(cert-oop11-cpp, -0.0)
+FAST(cert-oop54-cpp, 2.0)
+FAST(cert-oop57-cpp, -0.0)
 FAST(cert-oop58-cpp, 0.0)
-FAST(cert-pos44-c, 0.0)
-FAST(cert-pos47-c, -0.0)
-FAST(cert-sig30-c, 2.0)
-FAST(cert-str34-c, 0.0)
+FAST(cert-pos44-c, 2.0)
+FAST(cert-pos47-c, 0.0)
+FAST(cert-sig30-c, 1.0)
+FAST(cert-str34-c, 2.0)
 FAST(concurrency-mt-unsafe, 3.0)
 FAST(concurrency-thread-canceltype-asynchronous, 1.0)
-FAST(cppcoreguidelines-avoid-c-arrays, 2.0)
-FAST(cppcoreguidelines-avoid-const-or-ref-data-members, 1.0)
-FAST(cppcoreguidelines-avoid-do-while, -2.0)
-FAST(cppcoreguidelines-avoid-goto, 2.0)
-FAST(cppcoreguidelines-avoid-magic-numbers, 1.0)
+FAST(cppcoreguidelines-avoid-c-arrays, 0.0)
+FAST(cppcoreguidelines-avoid-capturing-lambda-coroutines, -1.0)
+FAST(cppcoreguidelines-avoid-const-or-ref-data-members, -2.0)
+FAST(cppcoreguidelines-avoid-do-while, -1.0)
+FAST(cppcoreguidelines-avoid-goto, -1.0)
+FAST(cppcoreguidelines-avoid-magic-numbers, -2.0)
 FAST(cppcoreguidelines-avoid-non-const-global-variables, -0.0)
+FAST(cppcoreguidelines-avoid-reference-coroutine-parameters, -0.0)
 FAST(cppcoreguidelines-c-copy-assignment-signature, 1.0)
-FAST(cppcoreguidelines-explicit-virtual-functions, -1.0)
-FAST(cppcoreguidelines-init-variables, 2.0)
-FAST(cppcoreguidelines-interfaces-global-init, -1.0)
-FAST(cppcoreguidelines-macro-usage, 2.0)
-FAST(cppcoreguidelines-narrowing-conversions, -2.0)
+FAST(cppcoreguidelines-explicit-virtual-functions, 0.0)
+FAST(cppcoreguidelines-init-variables, 1.0)
+FAST(cppcoreguidelines-interfaces-global-init, 1.0)
+FAST(cppcoreguidelines-macro-to-enum, 0.0)
+FAST(cppcoreguidelines-macro-usage, -0.0)
+FAST(cppcoreguidelines-misleading-capture-default-by-value, -1.0)
+FAST(cppcoreguidelines-missing-std-forward, 0.0)
+FAST(cppcoreguidelines-narrowing-conversions, 2.0)
 FAST(cppcoreguidelines-no-malloc, -1.0)
-FAST(cppcoreguidelines-non-private-member-variables-in-classes, -0.0)
+FAST(cppcoreguidelines-no-suspend-with-lock, 1.0)
+FAST(cppcoreguidelines-noexcept-destructor, -0.0)
+FAST(cppcoreguidelines-noexcept-move-operations, 2.0)
+FAST(cppcoreguidelines-noexcept-swap, -2.0)
+FAST(cppcoreguidelines-non-private-member-variables-in-classes, 1.0)
 FAST(cppcoreguidelines-owning-memory, 3.0)
-FAST(cppcoreguidelines-prefer-member-initializer, 1.0)
-FAST(cppcoreguidelines-pro-bounds-array-to-pointer-decay, 3.0)
-FAST(cppcoreguidelines-pro-bounds-constant-array-index, 3.0)
+FAST(cppcoreguidelines-prefer-member-initializer, 2.0)
+FAST(cppcoreguidelines-pro-bounds-array-to-pointer-decay, 2.0)
+FAST(cppcoreguidelines-pro-bounds-constant-array-index, 1.0)
 FAST(cppcoreguidelines-pro-bounds-pointer-arithmetic, 0.0)
-FAST(cppcoreguidelines-pro-type-const-cast, 1.0)
-FAST(cppcoreguidelines-pro-type-cstyle-cast, -1.0)
+FAST(cppcoreguidelines-pro-type-const-cast, -1.0)
+FAST(cppcoreguidelines-pro-type-cstyle-cast, 2.0)
 FAST(cppcoreguidelines-pro-type-member-init, 1.0)
-FAST(cppcoreguidelines-pro-type-reinterpret-cast, 2.0)
+FAST(cppcoreguidelines-pro-type-reinterpret-cast, -1.0)
 FAST(cppcoreguidelines-pro-type-static-cast-downcast, 0.0)
-FAST(cppcoreguidelines-pro-type-union-access, 1.0)
-FAST(cppcoreguidelines-pro-type-vararg, 1.0)
-FAST(cppcoreguidelines-slicing, 3.0)
+FAST(cppcoreguidelines-pro-type-union-access, 0.0)
+FAST(cppcoreguidelines-pro-type-vararg, -1.0)
+FAST(cppcoreguidelines-rvalue-reference-param-not-moved, 1.0)
+FAST(cppcoreguidelines-slicing, 1.0)
 FAST(cppcoreguidelines-special-member-functions, -1.0)
-FAST(cppcoreguidelines-virtual-class-destructor, 0.0)
-FAST(darwin-avoid-spinlock, 1.0)
-FAST(darwin-dispatch-once-nonstatic, -0.0)
-FAST(fuchsia-default-arguments-calls, 2.0)
+FAST(cppcoreguidelines-use-default-member-init, 0.0)
+FAST(cppcoreguidelines-virtual-class-destructor, -0.0)
+FAST(darwin-avoid-spinlock, 2.0)
+FAST(darwin-dispatch-once-nonstatic, 0.0)
+FAST(fuchsia-default-arguments-calls, 1.0)
 FAST(fuchsia-default-arguments-declarations, -0.0)
 FAST(fuchsia-header-anon-namespaces, -0.0)
-FAST(fuchsia-multiple-inheritance, -1.0)
-FAST(fuchsia-overloaded-operator, -0.0)
+FAST(fuchsia-multiple-inheritance, 0.0)
+FAST(fuchsia-overloaded-operator, 4.0)
 FAST(fuchsia-statically-constructed-objects, -0.0)
-FAST(fuchsia-trailing-return, 2.0)
+FAST(fuchsia-trailing-return, 1.0)
 FAST(fuchsia-virtual-inheritance, 1.0)
-FAST(google-build-explicit-make-pair, 2.0)
+FAST(google-build-explicit-make-pair, 3.0)
 FAST(google-build-namespaces, -1.0)
-FAST(google-build-using-namespace, 0.0)
-FAST(google-default-arguments, 1.0)
+FAST(google-build-using-namespace, -0.0)
+FAST(google-default-arguments, 0.0)
 FAST(google-explicit-constructor, 2.0)
-FAST(google-global-names-in-headers, -1.0)
+FAST(google-global-names-in-headers, 0.0)
 FAST(google-objc-avoid-nsobject-new, 1.0)
-FAST(google-objc-avoid-throwing-exception, 1.0)
-FAST(google-objc-function-naming, 1.0)
-FAST(google-objc-global-variable-declaration, -0.0)
-FAST(google-readability-avoid-underscore-in-googletest-name, -0.0)
-FAST(google-readability-braces-around-statements, 1.0)
-FAST(google-readability-casting, 1.0)
-FAST(google-readability-function-size, 0.0)
-FAST(google-readability-namespace-comments, 0.0)
+FAST(google-objc-avoid-throwing-exception, -0.0)
+FAST(google-objc-function-naming, -1.0)
+FAST(google-objc-global-variable-declaration, 0.0)
+FAST(google-readability-avoid-underscore-in-googletest-name, 1.0)
+FAST(google-readability-braces-around-statements, 0.0)
+FAST(google-readability-casting, -0.0)
+FAST(google-readability-function-size, 3.0)
+FAST(google-readability-namespace-comments, -0.0)
 FAST(google-readability-todo, 1.0)
-FAST(google-runtime-int, -1.0)
-FAST(google-runtime-operator, -0.0)
-FAST(google-upgrade-googletest-case, 0.0)
+FAST(google-runtime-int, 0.0)
+FAST(google-runtime-operator, 0.0)
+FAST(google-upgrade-googletest-case, 1.0)
 FAST(hicpp-avoid-c-arrays, 1.0)
-FAST(hicpp-avoid-goto, 0.0)
-FAST(hicpp-braces-around-statements, 1.0)
-FAST(hicpp-deprecated-headers, -1.0)
-FAST(hicpp-exception-baseclass, 0.0)
-FAST(hicpp-explicit-conversions, -1.0)
+FAST(hicpp-avoid-goto, -0.0)
+FAST(hicpp-braces-around-statements, 0.0)
+FAST(hicpp-deprecated-headers, 1.0)
+FAST(hicpp-exception-baseclass, -1.0)
+FAST(hicpp-explicit-conversions, 1.0)
 FAST(hicpp-function-size, 1.0)
-FAST(hicpp-invalid-access-moved, 6.0)
+FAST(hicpp-ignored-remove-result, 3.0)
+FAST(hicpp-invalid-access-moved, 4.0)
 FAST(hicpp-member-init, 2.0)
-FAST(hicpp-move-const-arg, 1.0)
-FAST(hicpp-multiway-paths-covered, 3.0)
-FAST(hicpp-named-parameter, 1.0)
-FAST(hicpp-new-delete-operators, 0.0)
-FAST(hicpp-no-array-decay, 3.0)
-FAST(hicpp-no-assembler, -0.0)
-FAST(hicpp-no-malloc, 1.0)
+FAST(hicpp-move-const-arg, 3.0)
+FAST(hicpp-multiway-paths-covered, 0.0)
+FAST(hicpp-named-parameter, 2.0)
+FAST(hicpp-new-delete-operators, -0.0)
+FAST(hicpp-no-array-decay, 4.0)
+FAST(hicpp-no-assembler, 1.0)
+FAST(hicpp-no-malloc, 2.0)
 FAST(hicpp-noexcept-move, -0.0)
-FAST(hicpp-signed-bitwise, 0.0)
-FAST(hicpp-special-member-functions, 0.0)
-FAST(hicpp-static-assert, 1.0)
-FAST(hicpp-undelegated-constructor, 3.0)
-FAST(hicpp-uppercase-literal-suffix, 3.0)
-FAST(hicpp-use-auto, 3.0)
-FAST(hicpp-use-emplace, 1.0)
-FAST(hicpp-use-equals-default, 1.0)
-FAST(hicpp-use-equals-delete, 0.0)
-FAST(hicpp-use-noexcept, -1.0)
-FAST(hicpp-use-nullptr, 2.0)
-FAST(hicpp-use-override, -0.0)
-FAST(hicpp-vararg, -2.0)
-FAST(linuxkernel-must-check-errs, 1.0)
-FAST(llvm-else-after-return, 0.0)
-FAST(llvm-header-guard, 2.0)
-FAST(llvm-include-order, 3.0)
-FAST(llvm-namespace-comment, 1.0)
-FAST(llvm-prefer-isa-or-dyn-cast-in-conditionals, 0.0)
-FAST(llvm-prefer-register-over-unsigned, 1.0)
-FAST(llvm-qualified-auto, 0.0)
-FAST(llvm-twine-local, -2.0)
-FAST(llvmlibc-callee-namespace, 2.0)
-FAST(llvmlibc-implementation-in-namespace, 2.0)
-FAST(llvmlibc-restrict-system-libc-headers, -0.0)
-FAST(misc-confusable-identifiers, 1.0)
-SLOW(misc-const-correctness, 261.0)
+FAST(hicpp-signed-bitwise, -1.0)
+FAST(hicpp-special-member-functions, -2.0)
+FAST(hicpp-static-assert, 4.0)
+FAST(hicpp-undelegated-constructor, 6.0)
+FAST(hicpp-uppercase-literal-suffix, 5.0)
+FAST(hicpp-use-auto, 0.0)
+FAST(hicpp-use-emplace, 3.0)
+FAST(hicpp-use-equals-default, 2.0)
+FAST(hicpp-use-equals-delete, 1.0)
+FAST(hicpp-use-noexcept, -0.0)
+FAST(hicpp-use-nullptr, 1.0)
+FAST(hicpp-use-override, -1.0)
+FAST(hicpp-vararg, 0.0)
+FAST(linuxkernel-must-check-errs, -0.0)
+FAST(llvm-else-after-return, -1.0)
+FAST(llvm-header-guard, 3.0)
+FAST(llvm-include-order, 0.0)
+FAST(llvm-namespace-comment, 0.0)
+FAST(llvm-prefer-isa-or-dyn-cast-in-conditionals, 3.0)
+FAST(llvm-prefer-register-over-unsigned, -0.0)
+FAST(llvm-qualified-auto, 4.0)
+FAST(llvm-twine-local, -0.0)
+FAST(llvmlibc-callee-namespace, -0.0)
+FAST(llvmlibc-implementation-in-namespace, 1.0)
+FAST(llvmlibc-inline-function-decl, 3.0)
+FAST(llvmlibc-restrict-system-libc-headers, 0.0)
+FAST(misc-confusable-identifiers, -1.0)
+SLOW(misc-const-correctness, 67.0)
+FAST(misc-coroutine-hostile-raii, 1.0)
 FAST(misc-definitions-in-headers, -1.0)
-FAST(misc-misleading-bidirectional, -0.0)
-FAST(misc-misleading-identifier, -1.0)
-FAST(misc-misplaced-const, 1.0)
-FAST(misc-new-delete-overloads, -1.0)
-FAST(misc-no-recursion, -2.0)
-FAST(misc-non-copyable-objects, 1.0)
-FAST(misc-non-private-member-variables-in-classes, 2.0)
-FAST(misc-redundant-expression, 0.0)
-FAST(misc-static-assert, 1.0)
-FAST(misc-throw-by-value-catch-by-reference, -1.0)
+SLOW(misc-header-include-cycle, 10.0)
+FAST(misc-include-cleaner, 5.0)
+FAST(misc-misleading-bidirectional, 1.0)
+FAST(misc-misleading-identifier, 3.0)
+FAST(misc-misplaced-const, -2.0)
+FAST(misc-new-delete-overloads, 1.0)
+FAST(misc-no-recursion, 0.0)
+FAST(misc-non-copyable-objects, 0.0)
+FAST(misc-non-private-member-variables-in-classes, -1.0)
+FAST(misc-redundant-expression, 1.0)
+FAST(misc-static-assert, 3.0)
+FAST(misc-throw-by-value-catch-by-reference, -0.0)
 FAST(misc-unconventional-assign-operator, 1.0)
-FAST(misc-uniqueptr-reset-release, 0.0)
-FAST(misc-unused-alias-decls, 1.0)
-FAST(misc-unused-parameters, 0.0)
-FAST(misc-unused-using-decls, 4.0)
+FAST(misc-uniqueptr-reset-release, 2.0)
+FAST(misc-unused-alias-decls, 2.0)
+FAST(misc-unused-parameters, 3.0)
+FAST(misc-unused-using-decls, 1.0)
+FAST(misc-use-anonymous-namespace, 1.0)
+FAST(misc-use-internal-linkage, 1.0)
 FAST(modernize-avoid-bind, 1.0)
 FAST(modernize-avoid-c-arrays, 2.0)
-FAST(modernize-concat-nested-namespaces, -1.0)
-FAST(modernize-deprecated-headers, -0.0)
-FAST(modernize-deprecated-ios-base-aliases, 1.0)
+FAST(modernize-concat-nested-namespaces, -0.0)
+FAST(modernize-deprecated-headers, 0.0)
+FAST(modernize-deprecated-ios-base-aliases, 0.0)
 FAST(modernize-loop-convert, 2.0)
-FAST(modernize-macro-to-enum, 1.0)
-FAST(modernize-make-shared, -0.0)
-FAST(modernize-make-unique, 0.0)
-FAST(modernize-pass-by-value, 1.0)
-FAST(modernize-raw-string-literal, 1.0)
-FAST(modernize-redundant-void-arg, -1.0)
-FAST(modernize-replace-auto-ptr, -1.0)
-FAST(modernize-replace-disallow-copy-and-assign-macro, -2.0)
-FAST(modernize-replace-random-shuffle, 0.0)
-FAST(modernize-return-braced-init-list, -1.0)
-FAST(modernize-shrink-to-fit, 2.0)
-FAST(modernize-unary-static-assert, -1.0)
-FAST(modernize-use-auto, 2.0)
+FAST(modernize-macro-to-enum, 0.0)
+FAST(modernize-make-shared, 2.0)
+FAST(modernize-make-unique, 1.0)
+FAST(modernize-min-max-use-initializer-list, 1.0)
+FAST(modernize-pass-by-value, 0.0)
+FAST(modernize-raw-string-literal, 2.0)
+FAST(modernize-redundant-void-arg, 1.0)
+FAST(modernize-replace-auto-ptr, 0.0)
+FAST(modernize-replace-disallow-copy-and-assign-macro, -0.0)
+FAST(modernize-replace-random-shuffle, 1.0)
+FAST(modernize-return-braced-init-list, 1.0)
+FAST(modernize-shrink-to-fit, 1.0)
+FAST(modernize-type-traits, 1.0)
+FAST(modernize-unary-static-assert, 1.0)
+FAST(modernize-use-auto, 0.0)
 FAST(modernize-use-bool-literals, 1.0)
-FAST(modernize-use-default-member-init, 2.0)
-FAST(modernize-use-emplace, 1.0)
-FAST(modernize-use-equals-default, 2.0)
-FAST(modernize-use-equals-delete, 0.0)
-FAST(modernize-use-nodiscard, 1.0)
-FAST(modernize-use-noexcept, 1.0)
-FAST(modernize-use-nullptr, 2.0)
-FAST(modernize-use-override, 1.0)
-FAST(modernize-use-trailing-return-type, -0.0)
-FAST(modernize-use-transparent-functors, -1.0)
-FAST(modernize-use-uncaught-exceptions, 1.0)
+FAST(modernize-use-constraints, 1.0)
+FAST(modernize-use-default-member-init, -0.0)
+FAST(modernize-use-designated-initializers, 1.0)
+FAST(modernize-use-emplace, 2.0)
+FAST(modernize-use-equals-default, 1.0)
+FAST(modernize-use-equals-delete, 2.0)
+FAST(modernize-use-nodiscard, -2.0)
+FAST(modernize-use-noexcept, -2.0)
+FAST(modernize-use-nullptr, 1.0)
+FAST(modernize-use-override, 0.0)
+FAST(modernize-use-ranges, 0.0)
+FAST(modernize-use-starts-ends-with, 0.0)
+FAST(modernize-use-std-format, -1.0)
+FAST(modernize-use-std-numbers, 0.0)
+FAST(modernize-use-std-print, -0.0)
+FAST(modernize-use-trailing-return-type, 3.0)
+FAST(modernize-use-transparent-functors, 0.0)
+FAST(modernize-use-uncaught-exceptions, 0.0)
 FAST(modernize-use-using, 1.0)
-FAST(objc-assert-equals, -1.0)
-FAST(objc-avoid-nserror-init, -2.0)
-FAST(objc-dealloc-in-category, -0.0)
-FAST(objc-forbidden-subclassing, 0.0)
-FAST(objc-missing-hash, 1.0)
-FAST(objc-nsdate-formatter, 1.0)
-FAST(objc-nsinvocation-argument-lifetime, -2.0)
-FAST(objc-property-declaration, -1.0)
-FAST(objc-super-self, -0.0)
-FAST(openmp-exception-escape, 1.0)
-FAST(openmp-use-default-none, 1.0)
-FAST(performance-faster-string-find, 2.0)
-FAST(performance-for-range-copy, 2.0)
-FAST(performance-implicit-conversion-in-loop, 1.0)
-FAST(performance-inefficient-algorithm, 0.0)
-FAST(performance-inefficient-string-concatenation, -0.0)
-FAST(performance-inefficient-vector-operation, 1.0)
-FAST(performance-move-const-arg, 3.0)
-FAST(performance-move-constructor-init, 1.0)
-FAST(performance-no-automatic-move, -1.0)
-FAST(performance-no-int-to-ptr, 2.0)
+FAST(objc-assert-equals, 1.0)
+FAST(objc-avoid-nserror-init, -0.0)
+FAST(objc-dealloc-in-category, 0.0)
+FAST(objc-forbidden-subclassing, 2.0)
+FAST(objc-missing-hash, -0.0)
+FAST(objc-nsdate-formatter, 0.0)
+FAST(objc-nsinvocation-argument-lifetime, 0.0)
+FAST(objc-property-declaration, -0.0)
+FAST(objc-super-self, -2.0)
+FAST(openmp-exception-escape, -1.0)
+FAST(openmp-use-default-none, 2.0)
+FAST(performance-avoid-endl, 2.0)
+FAST(performance-enum-size, -1.0)
+FAST(performance-faster-string-find, 1.0)
+FAST(performance-for-range-copy, 1.0)
+FAST(performance-implicit-conversion-in-loop, 0.0)
+FAST(performance-inefficient-algorithm, 1.0)
+FAST(performance-inefficient-string-concatenation, 1.0)
+FAST(performance-inefficient-vector-operation, -0.0)
+FAST(performance-move-const-arg, 2.0)
+FAST(performance-move-constructor-init, 2.0)
+FAST(performance-no-automatic-move, 2.0)
+FAST(performance-no-int-to-ptr, 0.0)
+FAST(performance-noexcept-destructor, -2.0)
 FAST(performance-noexcept-move-constructor, 1.0)
-FAST(performance-trivially-destructible, -1.0)
-FAST(performance-type-promotion-in-math-fn, 4.0)
-FAST(performance-unnecessary-copy-initialization, 4.0)
+FAST(performance-noexcept-swap, -2.0)
+FAST(performance-trivially-destructible, 3.0)
+FAST(performance-type-promotion-in-math-fn, 2.0)
+FAST(performance-unnecessary-copy-initialization, 2.0)
 FAST(performance-unnecessary-value-param, 2.0)
-FAST(portability-restrict-system-includes, 2.0)
-FAST(portability-simd-intrinsics, 2.0)
-FAST(portability-std-allocator-const, 2.0)
+FAST(portability-restrict-system-includes, 1.0)
+FAST(portability-simd-intrinsics, 1.0)
+FAST(portability-std-allocator-const, 3.0)
 FAST(readability-avoid-const-params-in-decls, -0.0)
-FAST(readability-braces-around-statements, 2.0)
-FAST(readability-const-return-type, -0.0)
-FAST(readability-container-contains, -0.0)
-FAST(readability-container-data-pointer, 0.0)
-SLOW(readability-container-size-empty, 16.0)
-FAST(readability-convert-member-functions-to-static, 0.0)
-FAST(readability-delete-null-pointer, 0.0)
-FAST(readability-duplicate-include, -0.0)
-FAST(readability-else-after-return, 1.0)
+FAST(readability-avoid-nested-conditional-operator, -1.0)
+FAST(readability-avoid-return-with-void-value, 0.0)
+FAST(readability-avoid-unconditional-preprocessor-if, -1.0)
+FAST(readability-braces-around-statements, 1.0)
+FAST(readability-const-return-type, -1.0)
+FAST(readability-container-contains, 3.0)
+FAST(readability-container-data-pointer, -1.0)
+SLOW(readability-container-size-empty, 13.0)
+FAST(readability-convert-member-functions-to-static, 4.0)
+FAST(readability-delete-null-pointer, -1.0)
+FAST(readability-duplicate-include, 2.0)
+FAST(readability-else-after-return, 0.0)
+FAST(readability-enum-initial-value, 0.0)
 FAST(readability-function-cognitive-complexity, 0.0)
-FAST(readability-function-size, 3.0)
-FAST(readability-identifier-length, -1.0)
-FAST(readability-identifier-naming, 5.0)
-FAST(readability-implicit-bool-conversion, 2.0)
-FAST(readability-inconsistent-declaration-parameter-name, 1.0)
-FAST(readability-isolate-declaration, 1.0)
-FAST(readability-magic-numbers, -1.0)
-FAST(readability-make-member-function-const, 2.0)
-FAST(readability-misleading-indentation, 0.0)
-FAST(readability-misplaced-array-index, -0.0)
+FAST(readability-function-size, 0.0)
+FAST(readability-identifier-length, 2.0)
+FAST(readability-identifier-naming, 1.0)
+FAST(readability-implicit-bool-conversion, 3.0)
+FAST(readability-inconsistent-declaration-parameter-name, -0.0)
+FAST(readability-isolate-declaration, 0.0)
+FAST(readability-magic-numbers, 4.0)
+FAST(readability-make-member-function-const, 1.0)
+FAST(readability-math-missing-parentheses, 1.0)
+FAST(readability-misleading-indentation, 1.0)
+FAST(readability-misplaced-array-index, 0.0)
 FAST(readability-named-parameter, -0.0)
-FAST(readability-non-const-parameter, 1.0)
-FAST(readability-qualified-auto, -0.0)
-FAST(readability-redundant-access-specifiers, -1.0)
-FAST(readability-redundant-control-flow, -1.0)
-FAST(readability-redundant-declaration, -0.0)
-FAST(readability-redundant-function-ptr-dereference, -1.0)
-FAST(readability-redundant-member-init, 0.0)
-FAST(readability-redundant-preprocessor, 0.0)
-FAST(readability-redundant-smartptr-get, 6.0)
-FAST(readability-redundant-string-cstr, 0.0)
-FAST(readability-redundant-string-init, 1.0)
-FAST(readability-simplify-boolean-expr, 1.0)
-FAST(readability-simplify-subscript-expr, -0.0)
-FAST(readability-static-accessed-through-instance, 1.0)
-FAST(readability-static-definition-in-anonymous-namespace, -0.0)
+FAST(readability-non-const-parameter, 2.0)
+FAST(readability-operators-representation, 0.0)
+FAST(readability-qualified-auto, 0.0)
+FAST(readability-redundant-access-specifiers, 1.0)
+FAST(readability-redundant-casting, -0.0)
+FAST(readability-redundant-control-flow, 1.0)
+FAST(readability-redundant-declaration, 1.0)
+FAST(readability-redundant-function-ptr-dereference, 0.0)
+FAST(readability-redundant-inline-specifier, 0.0)
+FAST(readability-redundant-member-init, 1.0)
+FAST(readability-redundant-preprocessor, -0.0)
+FAST(readability-redundant-smartptr-get, 4.0)
+FAST(readability-redundant-string-cstr, 1.0)
+FAST(readability-redundant-string-init, -0.0)
+FAST(readability-reference-to-constructed-temporary, 3.0)
+FAST(readability-simplify-boolean-expr, -0.0)
+FAST(readability-simplify-subscript-expr, 1.0)
+FAST(readability-static-accessed-through-instance, 0.0)
+FAST(readability-static-definition-in-anonymous-namespace, 0.0)
 FAST(readability-string-compare, -0.0)
-FAST(readability-suspicious-call-argument, 0.0)
-FAST(readability-uniqueptr-delete-release, 1.0)
-FAST(readability-uppercase-literal-suffix, 3.0)
-FAST(readability-use-anyofallof, 1.0)
+FAST(readability-suspicious-call-argument, -1.0)
+FAST(readability-uniqueptr-delete-release, 3.0)
+FAST(readability-uppercase-literal-suffix, 0.0)
+FAST(readability-use-anyofallof, 2.0)
+FAST(readability-use-std-min-max, -1.0)
 FAST(zircon-temporary-objects, 1.0)
 
 #undef FAST

From e3abd19242dd908e6186639d091f6ecc219963f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 8 Aug 2024 13:51:07 +0300
Subject: [PATCH 260/427] [compiler-rt] Support building runtimes for Windows
 on arm32 (#101462)

In these environments, the architecture name is armv7; recognize that
and enable the relevant runtimes.

Fix building the sanitizer_common library for this target, by using the
right registers for the architecture - this is similar to what
0c391133c9201ef29273554a1505ef855ce17668 did for aarch64.

(Still, address sanitizer doesn't support hooking functions at runtime
on armv7 or aarch64 - but other runtimes such as ubsan do work.)

(cherry picked from commit 5ea9dd8c7076270695a1d90b9c73718e7d95e0bf)
---
 compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake      | 4 ++++
 compiler-rt/lib/sanitizer_common/sanitizer_unwind_win.cpp | 7 +++++++
 compiler-rt/lib/sanitizer_common/sanitizer_win.cpp        | 5 +++++
 3 files changed, 16 insertions(+)

diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 29e5beb6182ba..37ad48bef818a 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -24,6 +24,10 @@ if(APPLE)
   set(X86_64 x86_64 x86_64h)
 endif()
 
+if(WIN32)
+  set(ARM32 ${ARM32} armv7)
+endif()
+
 set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64} ${RISCV64}
     ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9}
     ${HEXAGON} ${LOONGARCH64})
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_win.cpp
index afcd01dae0b7a..6fc18396ca63b 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_win.cpp
@@ -70,10 +70,17 @@ void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) {
   stack_frame.AddrStack.Offset = ctx.Rsp;
 #      endif
 #    else
+#      if SANITIZER_ARM
+  int machine_type = IMAGE_FILE_MACHINE_ARM;
+  stack_frame.AddrPC.Offset = ctx.Pc;
+  stack_frame.AddrFrame.Offset = ctx.R11;
+  stack_frame.AddrStack.Offset = ctx.Sp;
+#      else
   int machine_type = IMAGE_FILE_MACHINE_I386;
   stack_frame.AddrPC.Offset = ctx.Eip;
   stack_frame.AddrFrame.Offset = ctx.Ebp;
   stack_frame.AddrStack.Offset = ctx.Esp;
+#      endif
 #    endif
   stack_frame.AddrPC.Mode = AddrModeFlat;
   stack_frame.AddrFrame.Mode = AddrModeFlat;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index 995f00eddc38a..8a80d54751364 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -992,8 +992,13 @@ void SignalContext::InitPcSpBp() {
   sp = (uptr)context_record->Rsp;
 #    endif
 #  else
+#    if SANITIZER_ARM
+  bp = (uptr)context_record->R11;
+  sp = (uptr)context_record->Sp;
+#    else
   bp = (uptr)context_record->Ebp;
   sp = (uptr)context_record->Esp;
+#    endif
 #  endif
 }
 

From e594b284810c73b09da9436fdc6f1cbbfb4a7924 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 28 Aug 2024 12:54:14 +0200
Subject: [PATCH 261/427] [IndVars] Check if WideInc available before trying to
 use it

WideInc/WideIncExpr can be null. Previously this worked out
because the comparison with WideIncExpr would fail. Now we have
accesses to WideInc prior to that. Avoid the issue with an
explicit check.

Fixes https://github.com/llvm/llvm-project/issues/106239.

(cherry picked from commit c9a5e1b665dbba898e9981fd7d48881947e6560e)
---
 llvm/lib/Transforms/Utils/SimplifyIndVar.cpp  | 28 +++++++++------
 .../Transforms/IndVarSimplify/pr106239.ll     | 36 +++++++++++++++++++
 2 files changed, 53 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/pr106239.ll

diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 5bda7c50c62c6..0b4a75e0bc52d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1928,18 +1928,24 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU,
     if (!WideAddRec.first)
       return nullptr;
 
-    // Reuse the IV increment that SCEVExpander created. Recompute flags, unless
-    // the flags for both increments agree and it is safe to use the ones from
-    // the original inc. In that case, the new use of the wide increment won't
-    // be more poisonous.
-    bool NeedToRecomputeFlags =
-        !SCEVExpander::canReuseFlagsFromOriginalIVInc(OrigPhi, WidePhi,
-                                                      DU.NarrowUse, WideInc) ||
-        DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() ||
-        DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap();
+    auto CanUseWideInc = [&]() {
+      if (!WideInc)
+        return false;
+      // Reuse the IV increment that SCEVExpander created. Recompute flags,
+      // unless the flags for both increments agree and it is safe to use the
+      // ones from the original inc. In that case, the new use of the wide
+      // increment won't be more poisonous.
+      bool NeedToRecomputeFlags =
+          !SCEVExpander::canReuseFlagsFromOriginalIVInc(
+              OrigPhi, WidePhi, DU.NarrowUse, WideInc) ||
+          DU.NarrowUse->hasNoUnsignedWrap() != WideInc->hasNoUnsignedWrap() ||
+          DU.NarrowUse->hasNoSignedWrap() != WideInc->hasNoSignedWrap();
+      return WideAddRec.first == WideIncExpr &&
+             Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags);
+    };
+
     Instruction *WideUse = nullptr;
-    if (WideAddRec.first == WideIncExpr &&
-        Rewriter.hoistIVInc(WideInc, DU.NarrowUse, NeedToRecomputeFlags))
+    if (CanUseWideInc())
       WideUse = WideInc;
     else {
       WideUse = cloneIVUser(DU, WideAddRec.first);
diff --git a/llvm/test/Transforms/IndVarSimplify/pr106239.ll b/llvm/test/Transforms/IndVarSimplify/pr106239.ll
new file mode 100644
index 0000000000000..8d5aa99539a5a
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr106239.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+target datalayout = "n8:16:32:64"
+
+; Make sure it does not crash.
+
+define i32 @m() {
+; CHECK-LABEL: define i32 @m() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY_I6:.*]]
+; CHECK:       [[FOR_BODY_I6]]:
+; CHECK-NEXT:    br i1 true, label %[[I_EXIT:.*]], label %[[IF_END_I:.*]]
+; CHECK:       [[IF_END_I]]:
+; CHECK-NEXT:    store i64 0, ptr null, align 8
+; CHECK-NEXT:    br label %[[FOR_BODY_I6]]
+; CHECK:       [[I_EXIT]]:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %div.i4 = sdiv i32 1, 0
+  br label %for.body.i6
+
+for.body.i6:                                      ; preds = %if.end.i, %entry
+  %add57.i = phi i32 [ %add.i7, %if.end.i ], [ 0, %entry ]
+  br i1 true, label %i.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i6
+  %add.i7 = add i32 %add57.i, %div.i4
+  %conv.i = zext i32 %add57.i to i64
+  store i64 %conv.i, ptr null, align 8
+  br label %for.body.i6
+
+i.exit:                                           ; preds = %for.body.i6
+  ret i32 0
+}

From 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Mon, 2 Sep 2024 01:40:13 -0700
Subject: [PATCH 262/427] [clang-format] Correctly annotate braces in macro
 definition (#106662)

Fixes #106418.

(cherry picked from commit 0fa78b6c7bd43c2498700a98c47a02cf4fd06388)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 3 +--
 clang/unittests/Format/TokenAnnotatorTest.cpp | 5 +++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 7f5d157ae9589..60e65aaa83e9c 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -609,9 +609,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
             ProbablyBracedList = NextTok->isNot(tok::l_square);
           }
 
-          // Cpp macro definition body that is a nonempty braced list or block:
+          // Cpp macro definition body containing nonempty braced list or block:
           if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
-              !FormatTok->Previous && NextTok->is(tok::eof) &&
               // A statement can end with only `;` (simple statement), a block
               // closing brace (compound statement), or `:` (label statement).
               // If PrevTok is a block opening brace, Tok ends an empty block.
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index f0533c92f6538..db580d7005881 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3218,6 +3218,11 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_BRACE_KIND(Tokens[10], BK_Block);
   EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
   EXPECT_BRACE_KIND(Tokens[11], BK_Block);
+
+  Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit);
+  EXPECT_BRACE_KIND(Tokens[9], BK_BracedInit);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) {

From f3da9af3fd2696fbbe437dea599eda088fcb5592 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse@sony.com>
Date: Mon, 2 Sep 2024 11:56:40 +0100
Subject: [PATCH 263/427] [DebugInfo][RemoveDIs] Find types hidden in
 DbgRecords (#106547)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When serialising to textual IR, there can be constant Values referred to
by DbgRecords that don't appear anywhere else, and have types hidden
even deeper in side them. Enumerate these when enumerating all types.

Test by Mikael Holmén.

(cherry picked from commit 25f87f2d703178bb4bc13a62cb3df001b186cba2)
---
 llvm/lib/IR/DebugProgramInstruction.cpp       |  5 +-
 llvm/lib/IR/TypeFinder.cpp                    | 14 +++++
 .../DebugInfo/type-finder-w-dbg-records.ll    | 51 +++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/DebugInfo/type-finder-w-dbg-records.ll

diff --git a/llvm/lib/IR/DebugProgramInstruction.cpp b/llvm/lib/IR/DebugProgramInstruction.cpp
index 362d467beeb11..5d2189b54204f 100644
--- a/llvm/lib/IR/DebugProgramInstruction.cpp
+++ b/llvm/lib/IR/DebugProgramInstruction.cpp
@@ -473,11 +473,12 @@ DbgLabelRecord::createDebugIntrinsic(Module *M,
 
 Value *DbgVariableRecord::getAddress() const {
   auto *MD = getRawAddress();
-  if (auto *V = dyn_cast<ValueAsMetadata>(MD))
+  if (auto *V = dyn_cast_or_null<ValueAsMetadata>(MD))
     return V->getValue();
 
   // When the value goes to null, it gets replaced by an empty MDNode.
-  assert(!cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode");
+  assert(!MD ||
+         !cast<MDNode>(MD)->getNumOperands() && "Expected an empty MDNode");
   return nullptr;
 }
 
diff --git a/llvm/lib/IR/TypeFinder.cpp b/llvm/lib/IR/TypeFinder.cpp
index 003155a4af487..963f4b4806e1f 100644
--- a/llvm/lib/IR/TypeFinder.cpp
+++ b/llvm/lib/IR/TypeFinder.cpp
@@ -88,6 +88,20 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
         for (const auto &MD : MDForInst)
           incorporateMDNode(MD.second);
         MDForInst.clear();
+
+        // Incorporate types hiding in variable-location information.
+        for (const auto &Dbg : I.getDbgRecordRange()) {
+          // Pick out records that have Values.
+          if (const DbgVariableRecord *DVI =
+                  dyn_cast<DbgVariableRecord>(&Dbg)) {
+            for (Value *V : DVI->location_ops())
+              incorporateValue(V);
+            if (DVI->isDbgAssign()) {
+              if (Value *Addr = DVI->getAddress())
+                incorporateValue(Addr);
+            }
+          }
+        }
       }
   }
 
diff --git a/llvm/test/DebugInfo/type-finder-w-dbg-records.ll b/llvm/test/DebugInfo/type-finder-w-dbg-records.ll
new file mode 100644
index 0000000000000..8259b4a9f1c3a
--- /dev/null
+++ b/llvm/test/DebugInfo/type-finder-w-dbg-records.ll
@@ -0,0 +1,51 @@
+; RUN: opt --passes=verify %s -o - -S | FileCheck %s
+
+;; Test that the type definitions are discovered when serialising to LLVM-IR,
+;; even if they're only present inside a DbgRecord, and thus not normally
+;; visible.
+
+; CHECK: %union.anon = type { %struct.a }
+; CHECK: %struct.a = type { i32 }
+; CHECK: %union.anon2 = type { %struct.a2 }
+; CHECK: %struct.a2 = type { i32 }
+
+; ModuleID = 'bbi-98372.ll'
+source_filename = "bbi-98372.ll"
+
+%union.anon = type { %struct.a }
+%struct.a = type { i32 }
+%union.anon2 = type { %struct.a2 }
+%struct.a2 = type { i32 }
+
+@d = global [1 x { i16, i16 }] [{ i16, i16 } { i16 0, i16 undef }], align 1
+@e = global [1 x { i16, i16 }] [{ i16, i16 } { i16 0, i16 undef }], align 1
+
+define void @f() {
+entry:
+    #dbg_value(ptr getelementptr inbounds ([1 x %union.anon], ptr @d, i32 0, i32 3), !7, !DIExpression(), !14)
+    #dbg_assign(ptr null, !7, !DIExpression(), !16, ptr getelementptr inbounds ([1 x %union.anon2], ptr @e, i32 0, i32 3), !17, !14)
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "foo.c", directory: "/bar")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 1}
+!5 = !{i32 7, !"frame-pointer", i32 2}
+!6 = !{!"clang"}
+!7 = !DILocalVariable(name: "f", scope: !8, file: !1, line: 8, type: !12)
+!8 = distinct !DISubprogram(name: "e", scope: !1, file: !1, line: 8, type: !9, scopeLine: 8, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null}
+!11 = !{!7}
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 16)
+!13 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed)
+!14 = !DILocation(line: 0, scope: !8)
+!15 = !DILocation(line: 8, column: 28, scope: !8)
+!16 = distinct !DIAssignID()
+!17 = !DIExpression()

From 830b7ebac09ebef91671f0863986aee1a1d60e5e Mon Sep 17 00:00:00 2001
From: Patryk Wychowaniec <pwychowaniec@pm.me>
Date: Fri, 30 Aug 2024 15:25:54 +0200
Subject: [PATCH 264/427] [AVR] Fix parsing & emitting relative jumps (#106722)

Ever since 6859685a87ad093d60c8bed60b116143c0a684c7 (or, precisely,
84428dafc0941e3a31303fa1b286835ab2b8e234) relative jumps emitted by the
AVR codegen are off by two bytes - this pull request fixes it.

## Abstract

As compared to absolute jumps, relative jumps - such as rjmp, rcall or
brsh - have an implied `pc+2` behavior; that is, `jmp 100` is `pc =
100`, but `rjmp 100` gets understood as `pc = pc + 100 + 2`.

This is not reflected in the AVR codegen:

https://github.com/llvm/llvm-project/blob/f95026dbf66e353128a3a3d7b55f3e52d5985535/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp#L89

... which always emits relative jumps that are two bytes too far - or
rather it _would_ emit such jumps if not for this check:

https://github.com/llvm/llvm-project/blob/f95026dbf66e353128a3a3d7b55f3e52d5985535/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp#L517

... which causes most of the relative jumps to be actually resolved
late, by the linker, which applies the offsetting logic on its own,
hiding the issue within LLVM.

[Some time
ago](https://github.com/llvm/llvm-project/commit/697a162fa63df328ec9ca334636c5e85390b2bf0)
we've had a similar "jumps are off" problem that got solved by touching
`shouldForceRelocation()`, but I think that has worked only by accident.
It's exploited the fact that absolute vs relative jumps in the parsed
assembly can be distinguished through a "side channel" check relying on
the existence of labels (i.e. absolute jumps happen to named labels, but
relative jumps are anonymous, so to say). This was an alright idea back
then, but it got broken by 6859685a87ad093d60c8bed60b116143c0a684c7.

I propose a different approach:
- when emitting relative jumps, offset them by `-2` (well, `-1`,
strictly speaking, because those instructions rely on right-shifted
offset),
- when parsing relative jumps, treat `.` as `+2` and read `rjmp .+1234`
as `rjmp (1234 + 2)`.

This approach seems to be sound and now we generate the same assembly as
avr-gcc, which can be confirmed with:

```cpp
// avr-gcc test.c -O3 && avr-objdump -d a.out

int main() {
    asm(
"      foo:\n\t"
"        rjmp  .+2\n\t"
"        rjmp  .-2\n\t"
"        rjmp  foo\n\t"
"        rjmp  .+8\n\t"
"        rjmp  end\n\t"
"        rjmp  .+0\n\t"
"      end:\n\t"
"        rjmp .-4\n\t"
"        rjmp .-6\n\t"
"      x:\n\t"
"        rjmp x\n\t"
"        .short 0xc00f\n\t"
);
}
```

avr-gcc is also how I got the opcodes for all new tests like `inst-brbc.s`, so we should be good.

(cherry picked from commit 86a60e7f1e8f361f84ccb6e656e848dd4fbaa713)
---
 .../lib/Target/AVR/AsmParser/AVRAsmParser.cpp |  15 +-
 .../Target/AVR/MCTargetDesc/AVRAsmBackend.cpp |  12 +-
 llvm/test/CodeGen/AVR/jmp.ll                  |  25 ++
 llvm/test/MC/AVR/inst-brbc.s                  |  23 +-
 llvm/test/MC/AVR/inst-brbs.s                  |  22 +-
 llvm/test/MC/AVR/inst-brcc.s                  |  28 ++
 llvm/test/MC/AVR/inst-brcs.s                  |  28 ++
 llvm/test/MC/AVR/inst-breq.s                  |  28 ++
 llvm/test/MC/AVR/inst-brge.s                  |  24 ++
 llvm/test/MC/AVR/inst-brhc.s                  |  24 ++
 llvm/test/MC/AVR/inst-brhs.s                  |  24 ++
 llvm/test/MC/AVR/inst-brid.s                  |  24 ++
 llvm/test/MC/AVR/inst-brie.s                  |  24 ++
 llvm/test/MC/AVR/inst-brlo.s                  |  24 ++
 llvm/test/MC/AVR/inst-brlt.s                  |  24 ++
 llvm/test/MC/AVR/inst-brmi.s                  |  24 ++
 llvm/test/MC/AVR/inst-brne.s                  |  28 ++
 llvm/test/MC/AVR/inst-brpl.s                  |  24 ++
 llvm/test/MC/AVR/inst-brsh.s                  |  24 ++
 llvm/test/MC/AVR/inst-brtc.s                  |  24 ++
 llvm/test/MC/AVR/inst-brts.s                  |  24 ++
 llvm/test/MC/AVR/inst-brvc.s                  |  24 ++
 llvm/test/MC/AVR/inst-brvs.s                  |  24 ++
 llvm/test/MC/AVR/inst-family-cond-branch.s    | 321 ------------------
 llvm/test/MC/AVR/inst-rcall.s                 |  33 +-
 llvm/test/MC/AVR/inst-rjmp.s                  |  69 ++--
 26 files changed, 567 insertions(+), 401 deletions(-)
 create mode 100644 llvm/test/CodeGen/AVR/jmp.ll
 create mode 100644 llvm/test/MC/AVR/inst-brcc.s
 create mode 100644 llvm/test/MC/AVR/inst-brcs.s
 create mode 100644 llvm/test/MC/AVR/inst-breq.s
 create mode 100644 llvm/test/MC/AVR/inst-brge.s
 create mode 100644 llvm/test/MC/AVR/inst-brhc.s
 create mode 100644 llvm/test/MC/AVR/inst-brhs.s
 create mode 100644 llvm/test/MC/AVR/inst-brid.s
 create mode 100644 llvm/test/MC/AVR/inst-brie.s
 create mode 100644 llvm/test/MC/AVR/inst-brlo.s
 create mode 100644 llvm/test/MC/AVR/inst-brlt.s
 create mode 100644 llvm/test/MC/AVR/inst-brmi.s
 create mode 100644 llvm/test/MC/AVR/inst-brne.s
 create mode 100644 llvm/test/MC/AVR/inst-brpl.s
 create mode 100644 llvm/test/MC/AVR/inst-brsh.s
 create mode 100644 llvm/test/MC/AVR/inst-brtc.s
 create mode 100644 llvm/test/MC/AVR/inst-brts.s
 create mode 100644 llvm/test/MC/AVR/inst-brvc.s
 create mode 100644 llvm/test/MC/AVR/inst-brvs.s
 delete mode 100644 llvm/test/MC/AVR/inst-family-cond-branch.s

diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 383dfcc31117c..c016b2dd91dc6 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -72,7 +72,7 @@ class AVRAsmParser : public MCTargetAsmParser {
   int parseRegisterName();
   int parseRegister(bool RestoreOnFailure = false);
   bool tryParseRegisterOperand(OperandVector &Operands);
-  bool tryParseExpression(OperandVector &Operands);
+  bool tryParseExpression(OperandVector &Operands, int64_t offset);
   bool tryParseRelocExpression(OperandVector &Operands);
   void eatComma();
 
@@ -418,7 +418,7 @@ bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) {
   return false;
 }
 
-bool AVRAsmParser::tryParseExpression(OperandVector &Operands) {
+bool AVRAsmParser::tryParseExpression(OperandVector &Operands, int64_t offset) {
   SMLoc S = Parser.getTok().getLoc();
 
   if (!tryParseRelocExpression(Operands))
@@ -437,6 +437,11 @@ bool AVRAsmParser::tryParseExpression(OperandVector &Operands) {
   if (getParser().parseExpression(Expression))
     return true;
 
+  if (offset) {
+    Expression = MCBinaryExpr::createAdd(
+        Expression, MCConstantExpr::create(offset, getContext()), getContext());
+  }
+
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
   Operands.push_back(AVROperand::CreateImm(Expression, S, E));
   return false;
@@ -529,8 +534,9 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) {
     [[fallthrough]];
   case AsmToken::LParen:
   case AsmToken::Integer:
+    return tryParseExpression(Operands, 0);
   case AsmToken::Dot:
-    return tryParseExpression(Operands);
+    return tryParseExpression(Operands, 2);
   case AsmToken::Plus:
   case AsmToken::Minus: {
     // If the sign preceeds a number, parse the number,
@@ -540,7 +546,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) {
     case AsmToken::BigNum:
     case AsmToken::Identifier:
     case AsmToken::Real:
-      if (!tryParseExpression(Operands))
+      if (!tryParseExpression(Operands, 0))
         return false;
       break;
     default:
@@ -643,6 +649,7 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     // These specific operands should be treated as addresses/symbols/labels,
     // other than registers.
     bool maybeReg = true;
+
     if (OperandNum == 1) {
       std::array<StringRef, 8> Insts = {"lds", "adiw", "sbiw", "ldi"};
       for (auto Inst : Insts) {
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 0d29912bee264..388d58a82214d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -94,6 +94,9 @@ static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup,
 
   // Rightshifts the value by one.
   AVR::fixups::adjustBranchTarget(Value);
+
+  // Jumps are relative to the current instruction.
+  Value -= 1;
 }
 
 /// 22-bit absolute fixup.
@@ -513,15 +516,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   switch ((unsigned)Fixup.getKind()) {
   default:
     return Fixup.getKind() >= FirstLiteralRelocationKind;
-  // Fixups which should always be recorded as relocations.
   case AVR::fixup_7_pcrel:
   case AVR::fixup_13_pcrel:
-    // Do not force relocation for PC relative branch like 'rjmp .',
-    // 'rcall . - off' and 'breq . + off'.
-    if (const auto *SymA = Target.getSymA())
-      if (SymA->getSymbol().getName().size() == 0)
-        return false;
-    [[fallthrough]];
+    // Always resolve relocations for PC-relative branches
+    return false;
   case AVR::fixup_call:
     return true;
   }
diff --git a/llvm/test/CodeGen/AVR/jmp.ll b/llvm/test/CodeGen/AVR/jmp.ll
new file mode 100644
index 0000000000000..95dfff4836b4e
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/jmp.ll
@@ -0,0 +1,25 @@
+; RUN: llc -filetype=obj -mtriple=avr < %s | llvm-objdump -dr --no-show-raw-insn - | FileCheck %s
+
+define i8 @foo(i8 %a) {
+bb0:
+  %0 = tail call i8 @bar(i8 %a)
+  %1 = icmp eq i8 %0, 123
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  ret i8 100
+
+bb2:
+  ret i8 200
+}
+
+declare i8 @bar(i8);
+
+; CHECK: rcall   .-2
+; CHECK-NEXT: 00000000: R_AVR_13_PCREL bar
+; CHECK-NEXT: cpi     r24, 0x7b
+; CHECK-NEXT: brne    .+4
+; CHECK-NEXT: ldi     r24, 0x64
+; CHECK-NEXT: ret
+; CHECK-NEXT: ldi     r24, 0xc8
+; CHECK-NEXT: ret
diff --git a/llvm/test/MC/AVR/inst-brbc.s b/llvm/test/MC/AVR/inst-brbc.s
index 4d7d684da4468..3ef3664cf07bf 100644
--- a/llvm/test/MC/AVR/inst-brbc.s
+++ b/llvm/test/MC/AVR/inst-brbc.s
@@ -3,7 +3,6 @@
 ; RUN:     | llvm-objdump -d - | FileCheck --check-prefix=INST %s
 
 foo:
-
   brbc 3, .+8
   brbc 0, .-16
   .short 0xf759
@@ -11,14 +10,16 @@ foo:
   .short 0xf74c
   .short 0xf4c7
 
-; CHECK: brvc .Ltmp0+8              ; encoding: [0bAAAAA011,0b111101AA]
-; CHECK:                            ; fixup A - offset: 0, value: .Ltmp0+8, kind: fixup_7_pcrel
-; CHECK: brcc .Ltmp1-16             ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                            ; fixup A - offset: 0, value: .Ltmp1-16, kind: fixup_7_pcrel
+; CHECK: brvc (.Ltmp0+8)+2   ; encoding: [0bAAAAA011,0b111101AA]
+; CHECK-NEXT:                ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel
+;
+; CHECK: brcc (.Ltmp1-16)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ; fixup A - offset: 0, value: (.Ltmp1-16)+2, kind: fixup_7_pcrel
 
-; INST: 23 f4   brvc .+8
-; INST: c0 f7   brsh .-16
-; INST: 59 f7   brne .-42
-; INST: 52 f7   brpl .-44
-; INST: 4c f7   brge .-46
-; INST: c7 f4   brid .+48
+; INST-LABEL: <foo>:
+; INST-NEXT: 23 f4   brvc .+8
+; INST-NEXT: c0 f7   brsh .-16
+; INST-NEXT: 59 f7   brne .-42
+; INST-NEXT: 52 f7   brpl .-44
+; INST-NEXT: 4c f7   brge .-46
+; INST-NEXT: c7 f4   brid .+48
diff --git a/llvm/test/MC/AVR/inst-brbs.s b/llvm/test/MC/AVR/inst-brbs.s
index 7987feeec654a..f15a779a53654 100644
--- a/llvm/test/MC/AVR/inst-brbs.s
+++ b/llvm/test/MC/AVR/inst-brbs.s
@@ -3,7 +3,6 @@
 ; RUN:     | llvm-objdump -d - | FileCheck --check-prefix=INST %s
 
 foo:
-
   brbs 3, .+8
   brbs 0, .-12
   .short 0xf359
@@ -11,14 +10,15 @@ foo:
   .short 0xf34c
   .short 0xf077
 
-; CHECK: brvs .Ltmp0+8              ; encoding: [0bAAAAA011,0b111100AA]
-; CHECK:                            ; fixup A - offset: 0, value: .Ltmp0+8, kind: fixup_7_pcrel
-; CHECK: brcs .Ltmp1-12             ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                            ; fixup A - offset: 0, value: .Ltmp1-12, kind: fixup_7_pcrel
+; CHECK: brvs (.Ltmp0+8)+2   ; encoding: [0bAAAAA011,0b111100AA]
+; CHECK-NEXT:                ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel
+; CHECK: brcs (.Ltmp1-12)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:                ; fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel
 
-; INST: 23 f0   brvs .+8
-; INST: d0 f3   brlo .-12
-; INST: 59 f3   breq .-42
-; INST: 52 f3   brmi .-44
-; INST: 4c f3   brlt .-46
-; INST: 77 f0   brie .+28
+; INST-LABEL: <foo>:
+; INST-NEXT: 23 f0   brvs .+8
+; INST-NEXT: d0 f3   brlo .-12
+; INST-NEXT: 59 f3   breq .-42
+; INST-NEXT: 52 f3   brmi .-44
+; INST-NEXT: 4c f3   brlt .-46
+; INST-NEXT: 77 f0   brie .+28
diff --git a/llvm/test/MC/AVR/inst-brcc.s b/llvm/test/MC/AVR/inst-brcc.s
new file mode 100644
index 0000000000000..d9218bc61e787
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brcc.s
@@ -0,0 +1,28 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brcc .+66
+  brcc .-22
+  brbc 0, .+66
+  brbc 0, bar
+
+bar:
+
+; CHECK: brcc (.Ltmp0+66)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel
+; CHECK: brcc (.Ltmp1-22)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1-22)+2, kind: fixup_7_pcrel
+; CHECK: brcc (.Ltmp2+66)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp2+66)+2, kind: fixup_7_pcrel
+; CHECK: brcc bar            ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 08 f5      brsh .+66
+; INST-NEXT: a8 f7      brsh .-22
+; INST-NEXT: 08 f5      brsh .+66
+; INST-NEXT: 00 f4      brsh .+0
diff --git a/llvm/test/MC/AVR/inst-brcs.s b/llvm/test/MC/AVR/inst-brcs.s
new file mode 100644
index 0000000000000..0012cb31f6126
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brcs.s
@@ -0,0 +1,28 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brcs .+8
+  brcs .+4
+  brbs 0, .+8
+  brbs 0, bar
+
+bar:
+
+; CHECK: brcs (.Ltmp0+8)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel
+; CHECK: brcs (.Ltmp1+4)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp1+4)+2, kind: fixup_7_pcrel
+; CHECK: brcs (.Ltmp2+8)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_7_pcrel
+; CHECK: brcs bar           ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 20 f0      brlo .+8
+; INST-NEXT: 10 f0      brlo .+4
+; INST-NEXT: 20 f0      brlo .+8
+; INST-NEXT: 00 f0      brlo .+0
diff --git a/llvm/test/MC/AVR/inst-breq.s b/llvm/test/MC/AVR/inst-breq.s
new file mode 100644
index 0000000000000..f82010f02ba61
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-breq.s
@@ -0,0 +1,28 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  breq .-18
+  breq .-12
+  brbs 1, .-18
+  brbs 1, bar
+
+bar:
+
+; CHECK: breq    (.Ltmp0-18)+2     ; encoding: [0bAAAAA001,0b111100AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp0-18)+2, kind: fixup_7_pcrel
+; CHECK: breq    (.Ltmp1-12)+2     ; encoding: [0bAAAAA001,0b111100AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel
+; CHECK: brbs    1, (.Ltmp2-18)+2  ; encoding: [0bAAAAA001,0b111100AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp2-18)+2, kind: fixup_7_pcrel
+; CHECK: brbs    1, bar            ; encoding: [0bAAAAA001,0b111100AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: b9 f3      breq .-18
+; INST-NEXT: d1 f3      breq .-12
+; INST-NEXT: b9 f3      breq .-18
+; INST-NEXT: 01 f0      breq .+0
diff --git a/llvm/test/MC/AVR/inst-brge.s b/llvm/test/MC/AVR/inst-brge.s
new file mode 100644
index 0000000000000..1121284a11468
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brge.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brge .+50
+  brge .+42
+  brge bar
+
+bar:
+
+; CHECK: brge (.Ltmp0+50)+2  ; encoding: [0bAAAAA100,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+50)+2, kind: fixup_7_pcrel
+; CHECK: brge (.Ltmp1+42)+2  ; encoding: [0bAAAAA100,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+42)+2, kind: fixup_7_pcrel
+; CHECK: brge bar            ; encoding: [0bAAAAA100,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: cc f4      brge .+50
+; INST-NEXT: ac f4      brge .+42
+; INST-NEXT: 04 f4      brge .+0
diff --git a/llvm/test/MC/AVR/inst-brhc.s b/llvm/test/MC/AVR/inst-brhc.s
new file mode 100644
index 0000000000000..eb16ac2ef7a64
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brhc.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brhc .+12
+  brhc .+14
+  brhc bar
+
+bar:
+
+; CHECK: brhc (.Ltmp0+12)+2  ; encoding: [0bAAAAA101,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel
+; CHECK: brhc (.Ltmp1+14)+2  ; encoding: [0bAAAAA101,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel
+; CHECK: brhc bar            ; encoding: [0bAAAAA101,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 35 f4      brhc .+12
+; INST-NEXT: 3d f4      brhc .+14
+; INST-NEXT: 05 f4      brhc .+0
diff --git a/llvm/test/MC/AVR/inst-brhs.s b/llvm/test/MC/AVR/inst-brhs.s
new file mode 100644
index 0000000000000..77c49596b3b0b
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brhs.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brhs .-66
+  brhs .+14
+  brhs bar
+
+bar:
+
+; CHECK: brhs (.Ltmp0-66)+2  ; encoding: [0bAAAAA101,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0-66)+2, kind: fixup_7_pcrel
+; CHECK: brhs (.Ltmp1+14)+2  ; encoding: [0bAAAAA101,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel
+; CHECK: brhs bar            ; encoding: [0bAAAAA101,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: fd f2      brhs .-66
+; INST-NEXT: 3d f0      brhs .+14
+; INST-NEXT: 05 f0      brhs .+0
diff --git a/llvm/test/MC/AVR/inst-brid.s b/llvm/test/MC/AVR/inst-brid.s
new file mode 100644
index 0000000000000..70d0ea83c49b2
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brid.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brid .+42
+  brid .+62
+  brid bar
+
+bar:
+
+; CHECK: brid (.Ltmp0+42)+2  ; encoding: [0bAAAAA111,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+42)+2, kind: fixup_7_pcrel
+; CHECK: brid (.Ltmp1+62)+2  ; encoding: [0bAAAAA111,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+62)+2, kind: fixup_7_pcrel
+; CHECK: brid bar            ; encoding: [0bAAAAA111,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: af f4      brid .+42
+; INST-NEXT: ff f4      brid .+62
+; INST-NEXT: 07 f4      brid .+0
diff --git a/llvm/test/MC/AVR/inst-brie.s b/llvm/test/MC/AVR/inst-brie.s
new file mode 100644
index 0000000000000..717c686e2ed44
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brie.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brie .+20
+  brie .+40
+  brie bar
+
+bar:
+
+; CHECK: brie (.Ltmp0+20)+2  ; encoding: [0bAAAAA111,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+20)+2, kind: fixup_7_pcrel
+; CHECK: brie (.Ltmp1+40)+2  ; encoding: [0bAAAAA111,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+40)+2, kind: fixup_7_pcrel
+; CHECK: brie bar            ; encoding: [0bAAAAA111,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 57 f0      brie .+20
+; INST-NEXT: a7 f0      brie .+40
+; INST-NEXT: 07 f0      brie .+0
diff --git a/llvm/test/MC/AVR/inst-brlo.s b/llvm/test/MC/AVR/inst-brlo.s
new file mode 100644
index 0000000000000..4b56d66ffdfe0
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brlo.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brlo .+12
+  brlo .+28
+  brlo bar
+
+bar:
+
+; CHECK: brlo (.Ltmp0+12)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel
+; CHECK: brlo (.Ltmp1+28)+2  ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+28)+2, kind: fixup_7_pcrel
+; CHECK: brlo bar            ; encoding: [0bAAAAA000,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 30 f0      brlo .+12
+; INST-NEXT: 70 f0      brlo .+28
+; INST-NEXT: 00 f0      brlo .+0
diff --git a/llvm/test/MC/AVR/inst-brlt.s b/llvm/test/MC/AVR/inst-brlt.s
new file mode 100644
index 0000000000000..8a7c543f9444b
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brlt.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brlt .+16
+  brlt .+2
+  brlt bar
+
+bar:
+
+; CHECK: brlt (.Ltmp0+16)+2  ; encoding: [0bAAAAA100,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+16)+2, kind: fixup_7_pcrel
+; CHECK: brlt (.Ltmp1+2)+2   ; encoding: [0bAAAAA100,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel
+; CHECK: brlt bar            ; encoding: [0bAAAAA100,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 44 f0    brlt .+16
+; INST-NEXT: 0c f0    brlt .+2
+; INST-NEXT: 04 f0    brlt .+0
diff --git a/llvm/test/MC/AVR/inst-brmi.s b/llvm/test/MC/AVR/inst-brmi.s
new file mode 100644
index 0000000000000..878612d294dd9
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brmi.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brmi .+66
+  brmi .+58
+  brmi bar
+
+bar:
+
+; CHECK: brmi (.Ltmp0+66)+2  ; encoding: [0bAAAAA010,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel
+; CHECK: brmi (.Ltmp1+58)+2  ; encoding: [0bAAAAA010,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+58)+2, kind: fixup_7_pcrel
+; CHECK: brmi bar            ; encoding: [0bAAAAA010,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 0a f1      brmi .+66
+; INST-NEXT: ea f0      brmi .+58
+; INST-NEXT: 02 f0      brmi .+0
diff --git a/llvm/test/MC/AVR/inst-brne.s b/llvm/test/MC/AVR/inst-brne.s
new file mode 100644
index 0000000000000..9d6bee4b754d9
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brne.s
@@ -0,0 +1,28 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brne .+10
+  brne .+2
+  brbc 1, .+10
+  brbc 1, bar
+
+bar:
+
+; CHECK: brne    (.Ltmp0+10)+2     ; encoding: [0bAAAAA001,0b111101AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp0+10)+2, kind: fixup_7_pcrel
+; CHECK: brne    (.Ltmp1+2)+2      ; encoding: [0bAAAAA001,0b111101AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel
+; CHECK: brbc    1, (.Ltmp2+10)+2  ; encoding: [0bAAAAA001,0b111101AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: (.Ltmp2+10)+2, kind: fixup_7_pcrel
+; CHECK: brbc    1, bar            ; encoding: [0bAAAAA001,0b111101AA]
+; CHECK-NEXT:                      ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 29 f4      brne .+10
+; INST-NEXT: 09 f4      brne .+2
+; INST-NEXT: 29 f4      brne .+10
+; INST-NEXT: 01 f4      brne .+0
diff --git a/llvm/test/MC/AVR/inst-brpl.s b/llvm/test/MC/AVR/inst-brpl.s
new file mode 100644
index 0000000000000..393365ee35339
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brpl.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brpl .-12
+  brpl .+18
+  brpl bar
+
+bar:
+
+; CHECK: brpl (.Ltmp0-12)+2  ; encoding: [0bAAAAA010,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0-12)+2, kind: fixup_7_pcrel
+; CHECK: brpl (.Ltmp1+18)+2  ; encoding: [0bAAAAA010,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+18)+2, kind: fixup_7_pcrel
+; CHECK: brpl bar            ; encoding: [0bAAAAA010,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: d2 f7      brpl .-12
+; INST-NEXT: 4a f4      brpl .+18
+; INST-NEXT: 02 f4      brpl .+0
diff --git a/llvm/test/MC/AVR/inst-brsh.s b/llvm/test/MC/AVR/inst-brsh.s
new file mode 100644
index 0000000000000..0bacd64d3d8d0
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brsh.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brsh .+32
+  brsh .+70
+  brsh bar
+
+bar:
+
+; CHECK: brsh (.Ltmp0+32)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+32)+2, kind: fixup_7_pcrel
+; CHECK: brsh (.Ltmp1+70)+2  ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+70)+2, kind: fixup_7_pcrel
+; CHECK: brsh bar            ; encoding: [0bAAAAA000,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 80 f4      brsh .+32
+; INST-NEXT: 18 f5      brsh .+70
+; INST-NEXT: 00 f4      brsh .+0
diff --git a/llvm/test/MC/AVR/inst-brtc.s b/llvm/test/MC/AVR/inst-brtc.s
new file mode 100644
index 0000000000000..eb4ee21162872
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brtc.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brtc .+52
+  brtc .+50
+  brtc bar
+
+bar:
+
+; CHECK: brtc (.Ltmp0+52)+2  ; encoding: [0bAAAAA110,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+52)+2, kind: fixup_7_pcrel
+; CHECK: brtc (.Ltmp1+50)+2  ; encoding: [0bAAAAA110,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+50)+2, kind: fixup_7_pcrel
+; CHECK: brtc bar            ; encoding: [0bAAAAA110,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: d6 f4      brtc .+52
+; INST-NEXT: ce f4      brtc .+50
+; INST-NEXT: 06 f4      brtc .+0
diff --git a/llvm/test/MC/AVR/inst-brts.s b/llvm/test/MC/AVR/inst-brts.s
new file mode 100644
index 0000000000000..ccd794a922589
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brts.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brts .+18
+  brts .+22
+  brts bar
+
+bar:
+
+; CHECK: brts (.Ltmp0+18)+2  ; encoding: [0bAAAAA110,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel
+; CHECK: brts (.Ltmp1+22)+2  ; encoding: [0bAAAAA110,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+22)+2, kind: fixup_7_pcrel
+; CHECK: brts bar            ; encoding: [0bAAAAA110,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 4e f0      brts .+18
+; INST-NEXT: 5e f0      brts .+22
+; INST-NEXT: 06 f0      brts .+0
diff --git a/llvm/test/MC/AVR/inst-brvc.s b/llvm/test/MC/AVR/inst-brvc.s
new file mode 100644
index 0000000000000..573f779c0dcd6
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brvc.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brvc .-28
+  brvc .-62
+  brvc bar
+
+bar:
+
+; CHECK: brvc (.Ltmp0-28)+2  ; encoding: [0bAAAAA011,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0-28)+2, kind: fixup_7_pcrel
+; CHECK: brvc (.Ltmp1-62)+2  ; encoding: [0bAAAAA011,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1-62)+2, kind: fixup_7_pcrel
+; CHECK: brvc bar            ; encoding: [0bAAAAA011,0b111101AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 93 f7      brvc .-28
+; INST-NEXT: 0b f7      brvc .-62
+; INST-NEXT: 03 f4      brvc .+0
diff --git a/llvm/test/MC/AVR/inst-brvs.s b/llvm/test/MC/AVR/inst-brvs.s
new file mode 100644
index 0000000000000..d50a1a9ec5b62
--- /dev/null
+++ b/llvm/test/MC/AVR/inst-brvs.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
+; RUN: llvm-mc -filetype=obj -triple avr < %s \
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
+
+foo:
+  brvs .+18
+  brvs .+32
+  brvs bar
+
+bar:
+
+; CHECK: brvs (.Ltmp0+18)+2  ; encoding: [0bAAAAA011,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel
+; CHECK: brvs (.Ltmp1+32)+2  ; encoding: [0bAAAAA011,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: (.Ltmp1+32)+2, kind: fixup_7_pcrel
+; CHECK: brvs bar            ; encoding: [0bAAAAA011,0b111100AA]
+; CHECK-NEXT:                ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
+
+; INST-LABEL: <foo>:
+; INST-NEXT: 4b f0      brvs .+18
+; INST-NEXT: 83 f0      brvs .+32
+; INST-NEXT: 03 f0      brvs .+0
diff --git a/llvm/test/MC/AVR/inst-family-cond-branch.s b/llvm/test/MC/AVR/inst-family-cond-branch.s
deleted file mode 100644
index dc36425a884f3..0000000000000
--- a/llvm/test/MC/AVR/inst-family-cond-branch.s
+++ /dev/null
@@ -1,321 +0,0 @@
-; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
-; RUN: llvm-mc -filetype=obj -triple avr < %s \
-; RUN:     | llvm-objdump -d - | FileCheck --check-prefix=INST %s
-
-
-foo:
-  ; BREQ
-  breq .-18
-  breq .-12
-  brbs 1, .-18
-  brbs 1, baz
-
-; CHECK: breq    .Ltmp0-18               ; encoding: [0bAAAAA001,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp0-18, kind: fixup_7_pcrel
-; CHECK: breq    .Ltmp1-12               ; encoding: [0bAAAAA001,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp1-12, kind: fixup_7_pcrel
-; CHECK: brbs    1, .Ltmp2-18            ; encoding: [0bAAAAA001,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp2-18, kind: fixup_7_pcrel
-; CHECK: brbs    1, baz                  ; encoding: [0bAAAAA001,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: baz, kind: fixup_7_pcrel
-
-; INST-LABEL: <foo>:
-; INST: breq .-18
-; INST: breq .-12
-; INST: breq .-18
-; INST: breq .+0
-
-  ; BRNE
-  brne .+10
-  brne .+2
-  brbc 1, .+10
-  brbc 1, bar
-
-; CHECK: brne    .Ltmp3+10               ; encoding: [0bAAAAA001,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp3+10, kind: fixup_7_pcrel
-; CHECK: brne    .Ltmp4+2                ; encoding: [0bAAAAA001,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp4+2, kind: fixup_7_pcrel
-; CHECK: brbc    1, .Ltmp5+10            ; encoding: [0bAAAAA001,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp5+10, kind: fixup_7_pcrel
-; CHECK: brbc    1, bar                  ; encoding: [0bAAAAA001,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: bar, kind: fixup_7_pcrel
-
-; INST: brne .+10
-; INST: brne .+2
-; INST: brne .+10
-; INST: brne .+0
-
-bar:
-  ; BRCS
-  brcs .+8
-  brcs .+4
-  brbs 0, .+8
-  brbs 0, end
-
-; CHECK: brcs    .Ltmp6+8                ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp6+8, kind: fixup_7_pcrel
-; CHECK: brcs    .Ltmp7+4                ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp7+4, kind: fixup_7_pcrel
-; CHECK: brcs    .Ltmp8+8                ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp8+8, kind: fixup_7_pcrel
-; CHECK: brcs    end                     ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST-LABEL: <bar>:
-; INST: brlo .+8
-; INST: brlo .+4
-; INST: brlo .+8
-; INST: brlo .+0
-
-  ; BRCC
-  brcc .+66
-  brcc .-22
-  brbc 0, .+66
-  brbc 0, baz
-
-; CHECK: brcc    .Ltmp9+66               ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp9+66, kind: fixup_7_pcrel
-; CHECK: brcc    .Ltmp10-22              ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp10-22, kind: fixup_7_pcrel
-; CHECK: brcc    .Ltmp11+66              ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp11+66, kind: fixup_7_pcrel
-; CHECK: brcc    baz                     ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: baz, kind: fixup_7_pcrel
-
-; INST: brsh .+66
-; INST: brsh .-22
-; INST: brsh .+66
-; INST: brsh .+0
-
-; BRSH
-  brsh .+32
-  brsh .+70
-  brsh car
-
-; CHECK: brsh    .Ltmp12+32              ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp12+32, kind: fixup_7_pcrel
-; CHECK: brsh    .Ltmp13+70              ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp13+70, kind: fixup_7_pcrel
-; CHECK: brsh    car                     ; encoding: [0bAAAAA000,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: car, kind: fixup_7_pcrel
-
-; INST: brsh .+32
-; INST: brsh .+70
-; INST: brsh .+0
-
-baz:
-
-  ; BRLO
-  brlo .+12
-  brlo .+28
-  brlo car
-
-; CHECK: brlo    .Ltmp14+12              ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp14+12, kind: fixup_7_pcrel
-; CHECK: brlo    .Ltmp15+28              ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp15+28, kind: fixup_7_pcrel
-; CHECK: brlo    car                     ; encoding: [0bAAAAA000,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: car, kind: fixup_7_pcrel
-
-; INST-LABEL: <baz>:
-; INST: brlo .+12
-; INST: brlo .+28
-; INST: brlo .+0
-
-  ; BRMI
-  brmi .+66
-  brmi .+58
-  brmi car
-
-; CHECK: brmi    .Ltmp16+66              ; encoding: [0bAAAAA010,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp16+66, kind: fixup_7_pcrel
-; CHECK: brmi    .Ltmp17+58              ; encoding: [0bAAAAA010,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp17+58, kind: fixup_7_pcrel
-; CHECK: brmi    car                     ; encoding: [0bAAAAA010,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: car, kind: fixup_7_pcrel
-
-; INST: brmi .+66
-; INST: brmi .+58
-; INST: brmi .+0
-
-  ; BRPL
-  brpl .-12
-  brpl .+18
-  brpl car
-
-; CHECK: brpl    .Ltmp18-12              ; encoding: [0bAAAAA010,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp18-12, kind: fixup_7_pcrel
-; CHECK: brpl    .Ltmp19+18              ; encoding: [0bAAAAA010,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp19+18, kind: fixup_7_pcrel
-; CHECK: brpl    car                     ; encoding: [0bAAAAA010,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: car, kind: fixup_7_pcrel
-
-; INST: brpl .-12
-; INST: brpl .+18
-; INST: brpl .+0
-
-; BRGE
-  brge .+50
-  brge .+42
-  brge car
-
-; CHECK: brge    .Ltmp20+50              ; encoding: [0bAAAAA100,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp20+50, kind: fixup_7_pcrel
-; CHECK: brge    .Ltmp21+42              ; encoding: [0bAAAAA100,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp21+42, kind: fixup_7_pcrel
-; CHECK: brge    car                     ; encoding: [0bAAAAA100,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: car, kind: fixup_7_pcrel
-
-; INST: brge .+50
-; INST: brge .+42
-; INST: brge .+0
-
-car:
-  ; BRLT
-  brlt .+16
-  brlt .+2
-  brlt end
-
-; CHECK: brlt    .Ltmp22+16              ; encoding: [0bAAAAA100,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp22+16, kind: fixup_7_pcrel
-; CHECK: brlt    .Ltmp23+2               ; encoding: [0bAAAAA100,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp23+2, kind: fixup_7_pcrel
-; CHECK: brlt    end                     ; encoding: [0bAAAAA100,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST-LABEL: <car>:
-; INST: brlt	.+16
-; INST: brlt	.+2
-; INST: brlt	.+0
-
-  ; BRHS
-  brhs .-66
-  brhs .+14
-  brhs just_another_label
-
-; CHECK: brhs    .Ltmp24-66              ; encoding: [0bAAAAA101,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp24-66, kind: fixup_7_pcrel
-; CHECK: brhs    .Ltmp25+14              ; encoding: [0bAAAAA101,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp25+14, kind: fixup_7_pcrel
-; CHECK: brhs    just_another_label      ; encoding: [0bAAAAA101,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel
-
-; INST: brhs	.-66
-; INST: brhs	.+14
-; INST: brhs	.+0
-
-  ; BRHC
-  brhc .+12
-  brhc .+14
-  brhc just_another_label
-
-; CHECK: brhc    .Ltmp26+12              ; encoding: [0bAAAAA101,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp26+12, kind: fixup_7_pcrel
-; CHECK: brhc    .Ltmp27+14              ; encoding: [0bAAAAA101,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp27+14, kind: fixup_7_pcrel
-; CHECK: brhc    just_another_label      ; encoding: [0bAAAAA101,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel
-
-; INST: brhc	.+12
-; INST: brhc	.+14
-; INST: brhc	.+0
-
-  ; BRTS
-  brts .+18
-  brts .+22
-  brts just_another_label
-
-; CHECK: brts    .Ltmp28+18              ; encoding: [0bAAAAA110,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp28+18, kind: fixup_7_pcrel
-; CHECK: brts    .Ltmp29+22              ; encoding: [0bAAAAA110,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp29+22, kind: fixup_7_pcrel
-; CHECK: brts    just_another_label      ; encoding: [0bAAAAA110,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel
-
-; INST: brts	.+18
-; INST: brts	.+22
-; INST: brts	.+0
-
-just_another_label:
-  ; BRTC
-  brtc .+52
-  brtc .+50
-  brtc end
-
-; CHECK: brtc    .Ltmp30+52              ; encoding: [0bAAAAA110,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp30+52, kind: fixup_7_pcrel
-; CHECK: brtc    .Ltmp31+50              ; encoding: [0bAAAAA110,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp31+50, kind: fixup_7_pcrel
-; CHECK: brtc    end                     ; encoding: [0bAAAAA110,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST-LABEL: <just_another_label>:
-; INST: brtc	.+52
-; INST: brtc	.+50
-; INST: brtc	.+0
-
-  ; BRVS
-  brvs .+18
-  brvs .+32
-  brvs end
-
-; CHECK: brvs    .Ltmp32+18              ; encoding: [0bAAAAA011,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp32+18, kind: fixup_7_pcrel
-; CHECK: brvs    .Ltmp33+32              ; encoding: [0bAAAAA011,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp33+32, kind: fixup_7_pcrel
-; CHECK: brvs    end                     ; encoding: [0bAAAAA011,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST: brvs	.+18
-; INST: brvs	.+32
-; INST: brvs	.+0
-
-  ; BRVC
-  brvc .-28
-  brvc .-62
-  brvc end
-
-; CHECK: brvc    .Ltmp34-28              ; encoding: [0bAAAAA011,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp34-28, kind: fixup_7_pcrel
-; CHECK: brvc    .Ltmp35-62              ; encoding: [0bAAAAA011,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp35-62, kind: fixup_7_pcrel
-; CHECK: brvc    end                     ; encoding: [0bAAAAA011,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST: brvc	.-28
-; INST: brvc	.-62
-; INST: brvc	.+0
-
-  ; BRIE
-  brie .+20
-  brie .+40
-  brie end
-
-; CHECK: brie    .Ltmp36+20              ; encoding: [0bAAAAA111,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp36+20, kind: fixup_7_pcrel
-; CHECK: brie    .Ltmp37+40              ; encoding: [0bAAAAA111,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp37+40, kind: fixup_7_pcrel
-; CHECK: brie    end                     ; encoding: [0bAAAAA111,0b111100AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST: brie	.+20
-; INST: brie	.+40
-; INST: brie	.+0
-
-  ; BRID
-  brid .+42
-  brid .+62
-  brid end
-
-; CHECK: brid    .Ltmp38+42              ; encoding: [0bAAAAA111,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp38+42, kind: fixup_7_pcrel
-; CHECK: brid    .Ltmp39+62              ; encoding: [0bAAAAA111,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp39+62, kind: fixup_7_pcrel
-; CHECK: brid    end                     ; encoding: [0bAAAAA111,0b111101AA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_7_pcrel
-
-; INST: brid	.+42
-; INST: brid	.+62
-; INST: brid	.+0
-
-end:
diff --git a/llvm/test/MC/AVR/inst-rcall.s b/llvm/test/MC/AVR/inst-rcall.s
index 006013aa6ea94..a4ec32d05b1a4 100644
--- a/llvm/test/MC/AVR/inst-rcall.s
+++ b/llvm/test/MC/AVR/inst-rcall.s
@@ -1,27 +1,28 @@
 ; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
 ; RUN: llvm-mc -filetype=obj -triple avr < %s \
-; RUN:     | llvm-objdump -d - | FileCheck --check-prefix=INST %s
-
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
 
 foo:
-
   rcall  .+0
   rcall  .-8
   rcall  .+12
   rcall  .+46
   .short  0xdfea
 
-; CHECK: rcall  .Ltmp0+0             ; encoding: [A,0b1101AAAA]
-; CHECK:                             ;   fixup A - offset: 0, value: .Ltmp0+0, kind: fixup_13_pcrel
-; CHECK: rcall  .Ltmp1-8             ; encoding: [A,0b1101AAAA]
-; CHECK:                             ;   fixup A - offset: 0, value: .Ltmp1-8, kind: fixup_13_pcrel
-; CHECK: rcall  .Ltmp2+12            ; encoding: [A,0b1101AAAA]
-; CHECK:                             ;   fixup A - offset: 0, value: .Ltmp2+12, kind: fixup_13_pcrel
-; CHECK: rcall  .Ltmp3+46            ; encoding: [A,0b1101AAAA]
-; CHECK:                             ;   fixup A - offset: 0, value: .Ltmp3+46, kind: fixup_13_pcrel
+; CHECK: rcall (.Ltmp0+0)+2   ; encoding: [A,0b1101AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp0+0)+2, kind: fixup_13_pcrel
+; CHECK: rcall (.Ltmp1-8)+2   ; encoding: [A,0b1101AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp1-8)+2, kind: fixup_13_pcrel
+; CHECK: rcall (.Ltmp2+12)+2  ; encoding: [A,0b1101AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp2+12)+2, kind: fixup_13_pcrel
+; CHECK: rcall (.Ltmp3+46)+2  ; encoding: [A,0b1101AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp3+46)+2, kind: fixup_13_pcrel
 
-; INST: 00 d0    rcall .+0
-; INST: fc df    rcall .-8
-; INST: 06 d0    rcall .+12
-; INST: 17 d0    rcall .+46
-; INST: ea df    rcall .-44
+; INST-LABEL: <foo>:
+; INST-NEXT: 00 d0    rcall .+0
+; INST-NEXT: fc df    rcall .-8
+; INST-NEXT: 06 d0    rcall .+12
+; INST-NEXT: 17 d0    rcall .+46
+; INST-NEXT: ea df    rcall .-44
diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s
index 3dbac39e055dd..cc843a58b55d2 100644
--- a/llvm/test/MC/AVR/inst-rjmp.s
+++ b/llvm/test/MC/AVR/inst-rjmp.s
@@ -1,49 +1,56 @@
 ; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s
+;
 ; RUN: llvm-mc -filetype=obj -triple avr < %s \
-; RUN:     | llvm-objdump -d - | FileCheck --check-prefix=INST %s
-
+; RUN:     | llvm-objdump -d - \
+; RUN:     | FileCheck --check-prefix=INST %s
 
 foo:
-
   rjmp  .+2
   rjmp  .-2
   rjmp  foo
   rjmp  .+8
   rjmp  end
   rjmp  .+0
+
 end:
   rjmp .-4
   rjmp .-6
+
 x:
   rjmp x
   .short 0xc00f
 
-; CHECK: rjmp    .Ltmp0+2                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp0+2, kind: fixup_13_pcrel
-; CHECK: rjmp    .Ltmp1-2                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp1-2, kind: fixup_13_pcrel
-; CHECK: rjmp    foo                     ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: foo, kind: fixup_13_pcrel
-; CHECK: rjmp    .Ltmp2+8                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp2+8, kind: fixup_13_pcrel
-; CHECK: rjmp    end                     ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: end, kind: fixup_13_pcrel
-; CHECK: rjmp    .Ltmp3+0                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp3+0, kind: fixup_13_pcrel
-; CHECK: rjmp    .Ltmp4-4                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp4-4, kind: fixup_13_pcrel
-; CHECK: rjmp    .Ltmp5-6                ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: .Ltmp5-6, kind: fixup_13_pcrel
-; CHECK: rjmp    x                       ; encoding: [A,0b1100AAAA]
-; CHECK:                                 ;   fixup A - offset: 0, value: x, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp0+2)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp1-2)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel
+; CHECK: rjmp foo           ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: foo, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp2+8)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel
+; CHECK: rjmp end           ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: end, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp3+0)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp4-4)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp5-6)+2  ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel
+; CHECK: rjmp x             ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:               ;   fixup A - offset: 0, value: x, kind: fixup_13_pcrel
 
-; INST: 01 c0      rjmp  .+2
-; INST: ff cf      rjmp  .-2
-; INST: 00 c0      rjmp  .+0
-; INST: 04 c0      rjmp  .+8
-; INST: 00 c0      rjmp  .+0
-; INST: 00 c0      rjmp  .+0
-; INST: fe cf      rjmp  .-4
-; INST: fd cf      rjmp  .-6
-; INST: 00 c0      rjmp  .+0
-; INST: 0f c0      rjmp  .+30
+; INST-LABEL: <foo>:
+; INST-NEXT: 01 c0      rjmp  .+2
+; INST-NEXT: ff cf      rjmp  .-2
+; INST-NEXT: fd cf      rjmp  .-6
+; INST-NEXT: 04 c0      rjmp  .+8
+; INST-NEXT: 01 c0      rjmp  .+2
+; INST-NEXT: 00 c0      rjmp  .+0
+; INST-EMPTY:
+; INST-LABEL: <end>:
+; INST-NEXT: fe cf      rjmp  .-4
+; INST-NEXT: fd cf      rjmp  .-6
+; INST-EMPTY:
+; INST-LABEL: <x>:
+; INST-NEXT: ff cf      rjmp  .-2
+; INST-NEXT: 0f c0      rjmp  .+30

From a01d631a1c2c3902b383b6491f27b72d63f6257b Mon Sep 17 00:00:00 2001
From: Patryk Wychowaniec <pwychowaniec@pm.me>
Date: Fri, 30 Aug 2024 16:50:56 +0200
Subject: [PATCH 265/427] [AVR] Fix LLD test (#106739)

Since we don't generate relocations for those, it doesn't make sense to
assert them here; fallout of
https://github.com/llvm/llvm-project/pull/106722.

(cherry picked from commit a3816b5a573dbf57ba3082a919ca2de6b47257e9)
---
 lld/test/ELF/avr-reloc.s | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/lld/test/ELF/avr-reloc.s b/lld/test/ELF/avr-reloc.s
index ec088eaa149d0..41c32580f63a1 100644
--- a/lld/test/ELF/avr-reloc.s
+++ b/lld/test/ELF/avr-reloc.s
@@ -76,32 +76,6 @@ adiw r24, b   ; R_AVR_6_ADIW
 in    r20, b  ; R_AVR_PORT6
 sbic  b, 1    ; R_AVR_PORT5
 
-.section .PCREL,"ax",@progbits
-; CHECK-LABEL: section .PCREL
-; CHECK:       rjmp .+30
-; CHECK-NEXT:  rjmp .-36
-; CHECK-NEXT:  breq .+26
-; CHECK-NEXT:  breq .-40
-; CHECK-NEXT:  rjmp .-4096
-; CHECK-NEXT:  rjmp .+4094
-; CHECK-NEXT:  rjmp .+4094
-; CHECK-NEXT:  rjmp .-4096
-; CHECK-NEXT:  breq .-128
-; CHECK-NEXT:  breq .+126
-; HEX-LABEL:   section .PCREL:
-; HEX-NEXT:    0fc0eecf 69f061f3
-foo:
-rjmp foo + 32  ; R_AVR_13_PCREL
-rjmp foo - 32  ; R_AVR_13_PCREL
-breq foo + 32  ; R_AVR_7_PCREL
-breq foo - 32  ; R_AVR_7_PCREL
-rjmp 1f - 4096  $ 1:  ; R_AVR_13_PCREL
-rjmp 1f + 4094  $ 1:  ; R_AVR_13_PCREL
-rjmp 1f - 4098  $ 1:  ; R_AVR_13_PCREL (overflow)
-rjmp 1f + 4096  $ 1:  ; R_AVR_13_PCREL (overflow)
-breq 1f - 128   $ 1:  ; R_AVR_7_PCREL
-breq 1f + 126   $ 1:  ; R_AVR_7_PCREL
-
 .section .LDSSTS,"ax",@progbits
 ; CHECK-LABEL: section .LDSSTS:
 ; CHECK:       lds r20, 0x1e

From 0c641568515a797473394694f05937e1f1913d87 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 3 Sep 2024 16:09:11 +0200
Subject: [PATCH 266/427] Bump version to 19.1.0-rc4

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index e6042983c05cb..7e3bb98c3577a 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -10,6 +10,6 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX -rc3)
+  set(LLVM_VERSION_SUFFIX -rc4)
 endif()
 

From 8664666823b3eb8d96fde58f79d71d36bd7f9115 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Thu, 1 Aug 2024 16:18:20 -0700
Subject: [PATCH 267/427] Fix codegen of consteval functions returning an empty
 class, and related issues (#93115)

Fix codegen of consteval functions returning an empty class, and related
issues

If a class is empty, don't store it to memory: the store might overwrite
useful data. Similarly, if a class has tail padding that might overlap
other fields, don't store the tail padding to memory.

The problem here turned out a bit more general than I initially thought:
basically all uses of EmitAggregateStore were broken. Call lowering had
a method that did mostly the right thing, though: CreateCoercedStore.
Adapt CreateCoercedStore so it always does the conservatively right
thing, and use it for both calls and ConstantExpr.

Also, along the way, fix the "overlap" bit in AggValueSlot: the bit was
set incorrectly for empty classes in some cases.

Fixes #93040.

(cherry picked from commit 1762e01cca0186f1862db561cfd9019164b8c654)
---
 clang/lib/CodeGen/CGCall.cpp                  | 146 ++++++++----------
 clang/lib/CodeGen/CGExprAgg.cpp               |  23 +--
 clang/lib/CodeGen/CodeGenFunction.h           |   7 +-
 clang/test/CodeGen/arm-mve-intrinsics/vld24.c |  43 ++++--
 clang/test/CodeGen/arm-vfp16-arguments2.cpp   |  10 +-
 .../amdgpu-kernel-arg-pointer-type.cu         |  11 +-
 clang/test/CodeGenCUDA/builtins-amdgcn.cu     | 125 +++++++--------
 .../test/CodeGenCUDA/builtins-spirv-amdgcn.cu | 123 +++++++--------
 .../CodeGenCXX/address-space-cast-coerce.cpp  |   6 +-
 clang/test/CodeGenCXX/cxx2a-consteval.cpp     |  24 ++-
 clang/test/CodeGenCXX/trivial_abi.cpp         |  20 +++
 clang/test/CodeGenHIP/dpp-const-fold.hip      |   8 +-
 .../spirv-amdgcn-dpp-const-fold.hip           |   8 +-
 .../CodeGenOpenCL/addr-space-struct-arg.cl    |  11 +-
 .../amdgpu-abi-struct-arg-byref.cl            |  42 +++--
 15 files changed, 320 insertions(+), 287 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 6e69e84a2344c..d7ebffa8c5e4e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1336,75 +1336,50 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
   return CGF.Builder.CreateLoad(Tmp);
 }
 
-// Function to store a first-class aggregate into memory.  We prefer to
-// store the elements rather than the aggregate to be more friendly to
-// fast-isel.
-// FIXME: Do we need to recurse here?
-void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
-                                         bool DestIsVolatile) {
-  // Prefer scalar stores to first-class aggregate stores.
-  if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      Address EltPtr = Builder.CreateStructGEP(Dest, i);
-      llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
-      Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
-    }
-  } else {
-    Builder.CreateStore(Val, Dest, DestIsVolatile);
-  }
-}
-
-/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
-/// where the source and destination may have different types.  The
-/// destination is known to be aligned to \arg DstAlign bytes.
-///
-/// This safely handles the case when the src type is larger than the
-/// destination type; the upper bits of the src will be lost.
-static void CreateCoercedStore(llvm::Value *Src,
-                               Address Dst,
-                               bool DstIsVolatile,
-                               CodeGenFunction &CGF) {
-  llvm::Type *SrcTy = Src->getType();
-  llvm::Type *DstTy = Dst.getElementType();
-  if (SrcTy == DstTy) {
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
-    return;
-  }
-
-  llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
-
-  if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
-    Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
-                                             SrcSize.getFixedValue(), CGF);
-    DstTy = Dst.getElementType();
-  }
-
-  llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
-  llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
-  if (SrcPtrTy && DstPtrTy &&
-      SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
-    Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
+                                         llvm::TypeSize DstSize,
+                                         bool DstIsVolatile) {
+  if (!DstSize)
     return;
-  }
 
-  // If the source and destination are integer or pointer types, just do an
-  // extension or truncation to the desired type.
-  if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
-      (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
-    Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
-    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
-    return;
+  llvm::Type *SrcTy = Src->getType();
+  llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
+
+  // GEP into structs to try to make types match.
+  // FIXME: This isn't really that useful with opaque types, but it impacts a
+  // lot of regression tests.
+  if (SrcTy != Dst.getElementType()) {
+    if (llvm::StructType *DstSTy =
+            dyn_cast<llvm::StructType>(Dst.getElementType())) {
+      assert(!SrcSize.isScalable());
+      Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
+                                               SrcSize.getFixedValue(), *this);
+    }
   }
 
-  llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
-
-  // If store is legal, just bitcast the src pointer.
-  if (isa<llvm::ScalableVectorType>(SrcTy) ||
-      isa<llvm::ScalableVectorType>(DstTy) ||
-      SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
-    Dst = Dst.withElementType(SrcTy);
-    CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
+  if (SrcSize.isScalable() || SrcSize <= DstSize) {
+    if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
+        SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
+      // If the value is supposed to be a pointer, convert it before storing it.
+      Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
+      Builder.CreateStore(Src, Dst, DstIsVolatile);
+    } else if (llvm::StructType *STy =
+                   dyn_cast<llvm::StructType>(Src->getType())) {
+      // Prefer scalar stores to first-class aggregate stores.
+      Dst = Dst.withElementType(SrcTy);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+        Address EltPtr = Builder.CreateStructGEP(Dst, i);
+        llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
+        Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
+      }
+    } else {
+      Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
+    }
+  } else if (SrcTy->isIntegerTy()) {
+    // If the source is a simple integer, coerce it directly.
+    llvm::Type *DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() * 8);
+    Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
+    Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
     // simple.
@@ -1416,12 +1391,12 @@ static void CreateCoercedStore(llvm::Value *Src,
     // FIXME: Assert that we aren't truncating non-padding bits when have access
     // to that information.
     RawAddress Tmp =
-        CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
-    CGF.Builder.CreateStore(Src, Tmp);
-    CGF.Builder.CreateMemCpy(
-        Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
-        Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
-        llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
+        CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
+    Builder.CreateStore(Src, Tmp);
+    Builder.CreateMemCpy(Dst.emitRawPointer(*this),
+                         Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
+                         Tmp.getAlignment().getAsAlign(),
+                         Builder.CreateTypeSize(IntPtrTy, DstSize));
   }
 }
 
@@ -3309,7 +3284,12 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         assert(NumIRArgs == 1);
         auto AI = Fn->getArg(FirstIRArg);
         AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
+        CreateCoercedStore(
+            AI, Ptr,
+            llvm::TypeSize::getFixed(
+                getContext().getTypeSizeInChars(Ty).getQuantity() -
+                ArgI.getDirectOffset()),
+            /*DstIsVolatile=*/false);
       }
 
       // Match to what EmitParmDecl is expecting for this type.
@@ -5939,17 +5919,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
             llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
             return RValue::getComplex(std::make_pair(Real, Imag));
           }
-          case TEK_Aggregate: {
-            Address DestPtr = ReturnValue.getAddress();
-            bool DestIsVolatile = ReturnValue.isVolatile();
-
-            if (!DestPtr.isValid()) {
-              DestPtr = CreateMemTemp(RetTy, "agg.tmp");
-              DestIsVolatile = false;
-            }
-            EmitAggregateStore(CI, DestPtr, DestIsVolatile);
-            return RValue::getAggregate(DestPtr);
-          }
+          case TEK_Aggregate:
+            break;
           case TEK_Scalar: {
             // If the argument doesn't match, perform a bitcast to coerce it.
             // This can happen due to trivial type mismatches.
@@ -5959,7 +5930,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
             return RValue::get(V);
           }
           }
-          llvm_unreachable("bad evaluation kind");
         }
 
         // If coercing a fixed vector from a scalable vector for ABI
@@ -5981,10 +5951,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
         Address DestPtr = ReturnValue.getValue();
         bool DestIsVolatile = ReturnValue.isVolatile();
+        uint64_t DestSize =
+            getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();
 
         if (!DestPtr.isValid()) {
           DestPtr = CreateMemTemp(RetTy, "coerce");
           DestIsVolatile = false;
+          DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
         }
 
         // An empty record can overlap other data (if declared with
@@ -5993,7 +5966,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (!isEmptyRecord(getContext(), RetTy, true)) {
           // If the value is offset in memory, apply the offset now.
           Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
-          CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+          CreateCoercedStore(
+              CI, StorePtr,
+              llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
+              DestIsVolatile);
         }
 
         return convertTempToRValue(DestPtr, RetTy, SourceLocation());
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index c3c10e73ff05e..d9f44f4be617e 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -131,15 +131,12 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
     EnsureDest(E->getType());
 
     if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
-      Address StoreDest = Dest.getAddress();
-      // The emitted value is guaranteed to have the same size as the
-      // destination but can have a different type. Just do a bitcast in this
-      // case to avoid incorrect GEPs.
-      if (Result->getType() != StoreDest.getType())
-        StoreDest = StoreDest.withElementType(Result->getType());
-
-      CGF.EmitAggregateStore(Result, StoreDest,
-                             E->getType().isVolatileQualified());
+      CGF.CreateCoercedStore(
+          Result, Dest.getAddress(),
+          llvm::TypeSize::getFixed(
+              Dest.getPreferredSize(CGF.getContext(), E->getType())
+                  .getQuantity()),
+          E->getType().isVolatileQualified());
       return;
     }
     return Visit(E->getSubExpr());
@@ -2050,6 +2047,10 @@ CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
   if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType())
     return AggValueSlot::DoesNotOverlap;
 
+  // Empty fields can overlap earlier fields.
+  if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
+    return AggValueSlot::MayOverlap;
+
   // If the field lies entirely within the enclosing class's nvsize, its tail
   // padding cannot overlap any already-initialized object. (The only subobjects
   // with greater addresses that might already be initialized are vbases.)
@@ -2072,6 +2073,10 @@ AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
   if (IsVirtual)
     return AggValueSlot::MayOverlap;
 
+  // Empty bases can overlap earlier bases.
+  if (BaseRD->isEmpty())
+    return AggValueSlot::MayOverlap;
+
   // If the base class is laid out entirely within the nvsize of the derived
   // class, its tail padding cannot yet be initialized, so we can issue
   // stores at the full width of the base class.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index ba7b565d97559..60e6841e1b3d6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4838,9 +4838,10 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
                             ExprValueKind SrcKind);
 
-  /// Build all the stores needed to initialize an aggregate at Dest with the
-  /// value Val.
-  void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
+  /// Create a store to \arg DstPtr from \arg Src, truncating the stored value
+  /// to at most \arg DstSize bytes.
+  void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
+                          bool DstIsVolatile);
 
   /// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
   /// make sure it survives garbage collection until this point.
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
index 03c870e281549..15619bef5373d 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -48,10 +48,13 @@ uint8x16x4_t test_vld4q_u8(const uint8_t *addr)
 
 // CHECK-LABEL: @test_vst2q_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[VALUE_COERCE]], 0, 1
-// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
-// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
+// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP0]], 1
+// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR:%.*]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[ADDR]], <4 x i32> [[DOTFCA_0_EXTRACT]], <4 x i32> [[DOTFCA_1_EXTRACT]], i32 1)
 // CHECK-NEXT:    ret void
 //
 void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
@@ -65,14 +68,19 @@ void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
 
 // CHECK-LABEL: @test_vst4q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 1
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 2
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] [[VALUE_COERCE]], 0, 3
-// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 0)
-// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 1)
-// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 2)
-// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 3)
+// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] [[VALUE_COERCE:%.*]], 0
+// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 1
+// CHECK-NEXT:    [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 2
+// CHECK-NEXT:    [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[TMP0]], 3
+// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT:    [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[DOTFCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT:    [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[DOTFCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR:%.*]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 1)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 2)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst4q.p0.v16i8(ptr [[ADDR]], <16 x i8> [[DOTFCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_EXTRACT]], i32 3)
 // CHECK-NEXT:    ret void
 //
 void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
@@ -86,10 +94,13 @@ void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
 
 // CHECK-LABEL: @test_vst2q_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0, 0
-// CHECK-NEXT:    [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[VALUE_COERCE]], 0, 1
-// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
-// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
+// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] [[VALUE_COERCE:%.*]], 0
+// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 0
+// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[TMP0]], 1
+// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] poison, <8 x half> [[DOTFCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x half> [[DOTFCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR:%.*]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 0)
+// CHECK-NEXT:    call void @llvm.arm.mve.vst2q.p0.v8f16(ptr [[ADDR]], <8 x half> [[DOTFCA_0_EXTRACT]], <8 x half> [[DOTFCA_1_EXTRACT]], i32 1)
 // CHECK-NEXT:    ret void
 //
 void test_vst2q_f16(float16_t *addr, float16x8x2_t value)
diff --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp
index 6221e85e856b4..b7c6852c47b7f 100644
--- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp
+++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp
@@ -44,20 +44,20 @@ struct S1 f1(struct S1 s1) { return s1; }
 
 // CHECK-SOFT: define{{.*}} void @_Z2f22S2(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S2) align 8 %agg.result, [4 x i32] %s2.coerce)
 // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f22S2([2 x <2 x i32>] returned %s2.coerce)
-// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S2 @_Z2f22S2(%struct.S2 returned %s2.coerce)
+// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S2 @_Z2f22S2(%struct.S2 %s2.coerce)
 struct S2 f2(struct S2 s2) { return s2; }
 
 // CHECK-SOFT: define{{.*}} void @_Z2f32S3(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S3) align 8 %agg.result, [2 x i64] %s3.coerce)
 // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f32S3([2 x <2 x i32>] returned %s3.coerce)
-// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S3 @_Z2f32S3(%struct.S3 returned %s3.coerce)
+// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S3 @_Z2f32S3(%struct.S3 %s3.coerce)
 struct S3 f3(struct S3 s3) { return s3; }
 
 // CHECK-SOFT: define{{.*}} void @_Z2f42S4(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S4) align 8 %agg.result, [2 x i64] %s4.coerce)
 // CHECK-HARD: define{{.*}} arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f42S4([2 x <2 x i32>] returned %s4.coerce)
-// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S4 @_Z2f42S4(%struct.S4 returned %s4.coerce)
+// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S4 @_Z2f42S4(%struct.S4 %s4.coerce)
 struct S4 f4(struct S4 s4) { return s4; }
 
 // CHECK-SOFT: define{{.*}} void @_Z2f52S5(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.S5) align 8 %agg.result, [2 x i64] %s5.coerce)
-// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce)
-// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce)
+// CHECK-HARD: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 %s5.coerce)
+// CHECK-FULL: define{{.*}} arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 %s5.coerce)
 struct S5 f5(struct S5 s5) { return s5; }
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index a5135ab01f0f3..70c86cbb8c3d4 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -16,9 +16,8 @@
 
 // HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(ptr noundef %x)
 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr addrspace(1){{.*}} %x.coerce)
-// CHECK:     ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
 // CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4, !amdgpu.noclobber ![[MD:[0-9]+]]
+// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
 // OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
 // OPT: ret void
@@ -28,9 +27,8 @@ __global__ void kernel1(int *x) {
 
 // HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(ptr noundef nonnull align 4 dereferenceable(4) %x)
 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(ptr addrspace(1){{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
-// CHECK:     ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
 // CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4, !amdgpu.noclobber ![[MD]]
+// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
 // OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
 // OPT: ret void
@@ -67,7 +65,7 @@ struct S {
 // OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8
 // OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8
 // OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1)
-// OPT: [[V0:%.*]] = load i32, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD]]
+// OPT: [[V0:%.*]] = load i32, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD:[0-9]+]]
 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
 // OPT: store i32 [[INC]], ptr addrspace(1) [[G0]], align 4
 // OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4
@@ -126,9 +124,8 @@ struct SS {
 };
 // HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(ptr %a.coerce)
 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(ptr addrspace(1){{.*}} %a.coerce)
-// CHECK:     ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
 // CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load float, ptr addrspace(1) %a.coerce, align 4, !amdgpu.noclobber ![[MD]]
+// OPT: [[VAL:%.*]] = load float, ptr addrspace(1) %a.coerce, align 4{{$}}
 // OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
 // OPT: store float [[INC]], ptr addrspace(1) %a.coerce, align 4
 // OPT: ret void
diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 2e88afac813f4..4bf23e529c7a5 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -17,17 +17,16 @@
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
 // CHECK-NEXT:    [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DISPATCH_PTR]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
-// CHECK-NEXT:    store ptr [[TMP2]], ptr [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_dispatch_ptr(int* out) {
@@ -43,17 +42,16 @@ __global__ void use_dispatch_ptr(int* out) {
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
 // CHECK-NEXT:    [[QUEUE_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[QUEUE_PTR]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
-// CHECK-NEXT:    store ptr [[TMP2]], ptr [[QUEUE_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[QUEUE_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[QUEUE_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[QUEUE_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_queue_ptr(int* out) {
@@ -69,17 +67,16 @@ __global__ void use_queue_ptr(int* out) {
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
 // CHECK-NEXT:    [[IMPLICITARG_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMPLICITARG_PTR]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
-// CHECK-NEXT:    store ptr [[TMP2]], ptr [[IMPLICITARG_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[IMPLICITARG_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[IMPLICITARG_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[IMPLICITARG_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_implicitarg_ptr(int* out) {
@@ -134,16 +131,15 @@ __global__ void test_ds_fadd(float src) {
 // CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
 // CHECK-NEXT:    [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
-// CHECK-NEXT:    store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
+// CHECK-NEXT:    store volatile float [[TMP3]], ptr [[X_ASCAST]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void test_ds_fmin(float src, float *shared) {
@@ -184,17 +180,16 @@ __global__ void endpgm() {
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
 // CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i64 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i64 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35)
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i64 [[TMP3]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP0]], i64 [[TMP1]], i32 35)
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 8
 // CHECK-NEXT:    ret void
 //
 __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
@@ -210,13 +205,12 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i64 [[TMP0]], ptr [[TMP1]], align 8
 // CHECK-NEXT:    ret void
 //
 __global__ void test_s_memtime(unsigned long long* out)
@@ -237,18 +231,17 @@ __device__ void func(float *x);
 // CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_ADDR]] to ptr
 // CHECK-NEXT:    [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SHARED_ADDR]] to ptr
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[SHARED_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    [[SHARED1:%.*]] = load ptr, ptr [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store ptr [[SHARED1]], ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
-// CHECK-NEXT:    store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    call void @_Z4funcPf(ptr noundef [[TMP5]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
+// CHECK-NEXT:    store volatile float [[TMP3]], ptr [[X_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_Z4funcPf(ptr noundef [[TMP4]]) #[[ATTR7:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
@@ -264,14 +257,13 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
 // CHECK-NEXT:    [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[X_COERCE:%.*]], ptr [[X_ASCAST]], align 8
 // CHECK-NEXT:    [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[TMP1]])
-// CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TMP2]] to i8
-// CHECK-NEXT:    store i8 [[FROMBOOL]], ptr [[RET_ASCAST]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[TMP1]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr [[RET_ASCAST]], align 1
 // CHECK-NEXT:    ret void
 //
 __global__ void test_is_shared(float *x){
@@ -286,14 +278,13 @@ __global__ void test_is_shared(float *x){
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
 // CHECK-NEXT:    [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr
-// CHECK-NEXT:    store ptr [[TMP0]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[X_COERCE:%.*]], ptr [[X_ASCAST]], align 8
 // CHECK-NEXT:    [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
 // CHECK-NEXT:    store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP1]])
-// CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TMP2]] to i8
-// CHECK-NEXT:    store i8 [[FROMBOOL]], ptr [[RET_ASCAST]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP0]])
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[TMP1]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr [[RET_ASCAST]], align 1
 // CHECK-NEXT:    ret void
 //
 __global__ void test_is_private(int *x){
diff --git a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
index 32851805298f1..1cbe358910b85 100644
--- a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
@@ -17,16 +17,15 @@
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[DISPATCH_PTR_ASCAST:%.*]] = addrspacecast ptr [[DISPATCH_PTR]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(4) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call align 4 dereferenceable(64) addrspace(4) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DISPATCH_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_dispatch_ptr(int* out) {
@@ -42,16 +41,15 @@ __global__ void use_dispatch_ptr(int* out) {
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[QUEUE_PTR_ASCAST:%.*]] = addrspacecast ptr [[QUEUE_PTR]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(4) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[QUEUE_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_queue_ptr(int* out) {
@@ -67,16 +65,15 @@ __global__ void use_queue_ptr(int* out) {
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[IMPLICITARG_PTR_ASCAST:%.*]] = addrspacecast ptr [[IMPLICITARG_PTR]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(4) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = call addrspace(4) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IMPLICITARG_PTR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP2]], ptr addrspace(4) [[TMP3]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void use_implicitarg_ptr(int* out) {
@@ -131,16 +128,15 @@ __global__ void test_ds_fadd(float src) {
 // CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(3)
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
-// CHECK-NEXT:    store volatile float [[TMP4]], ptr addrspace(4) [[X_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
+// CHECK-NEXT:    store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
 // CHECK-NEXT:    ret void
 //
 __global__ void test_ds_fmin(float src, float *shared) {
@@ -175,17 +171,16 @@ __global__ void endpgm() {
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr [[A_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr [[B_ADDR]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i64 [[A:%.*]], ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i64 [[B:%.*]], ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP3:%.*]] = call addrspace(4) i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 35)
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i64 [[TMP3]], ptr addrspace(4) [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr addrspace(4) [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr addrspace(4) [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = call addrspace(4) i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP0]], i64 [[TMP1]], i32 35)
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr addrspace(4) [[TMP3]], align 8
 // CHECK-NEXT:    ret void
 //
 __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
@@ -201,13 +196,12 @@ __global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, un
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
 // CHECK-NEXT:    [[OUT_ASCAST:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(4)
 // CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[OUT_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[OUT_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT_COERCE:%.*]], ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    [[OUT1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[OUT1]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i64 [[TMP1]], ptr addrspace(4) [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = call addrspace(4) i64 @llvm.amdgcn.s.memtime()
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i64 [[TMP0]], ptr addrspace(4) [[TMP1]], align 8
 // CHECK-NEXT:    ret void
 //
 __global__ void test_s_memtime(unsigned long long* out)
@@ -228,18 +222,17 @@ __device__ void func(float *x);
 // CHECK-NEXT:    [[SRC_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[SHARED_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SHARED_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[SHARED_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[SHARED_COERCE:%.*]], ptr addrspace(4) [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    [[SHARED1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ASCAST]], align 8
 // CHECK-NEXT:    store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    store ptr addrspace(4) [[SHARED1]], ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(3)
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
-// CHECK-NEXT:    store volatile float [[TMP4]], ptr addrspace(4) [[X_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP5]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4
+// CHECK-NEXT:    store volatile float [[TMP3]], ptr addrspace(4) [[X_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP4]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
@@ -255,15 +248,14 @@ __global__ void test_ds_fmin_func(float src, float *__restrict shared) {
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
 // CHECK-NEXT:    [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
-// CHECK-NEXT:    [[TMP3:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.shared(ptr [[TMP2]])
-// CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8
-// CHECK-NEXT:    store i8 [[FROMBOOL]], ptr addrspace(4) [[RET_ASCAST]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT:    [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.shared(ptr [[TMP1]])
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
 // CHECK-NEXT:    ret void
 //
 __global__ void test_is_shared(float *x){
@@ -278,15 +270,14 @@ __global__ void test_is_shared(float *x){
 // CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
 // CHECK-NEXT:    [[RET_ASCAST:%.*]] = addrspacecast ptr [[RET]] to ptr addrspace(4)
-// CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X_COERCE:%.*]] to ptr addrspace(4)
-// CHECK-NEXT:    store ptr addrspace(4) [[TMP0]], ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[X_COERCE:%.*]], ptr addrspace(4) [[X_ASCAST]], align 8
 // CHECK-NEXT:    [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
 // CHECK-NEXT:    store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr
-// CHECK-NEXT:    [[TMP3:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.private(ptr [[TMP2]])
-// CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TMP3]] to i8
-// CHECK-NEXT:    store i8 [[FROMBOOL]], ptr addrspace(4) [[RET_ASCAST]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
+// CHECK-NEXT:    [[TMP2:%.*]] = call addrspace(4) i1 @llvm.amdgcn.is.private(ptr [[TMP1]])
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[TMP2]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr addrspace(4) [[RET_ASCAST]], align 1
 // CHECK-NEXT:    ret void
 //
 __global__ void test_is_private(int *x){
diff --git a/clang/test/CodeGenCXX/address-space-cast-coerce.cpp b/clang/test/CodeGenCXX/address-space-cast-coerce.cpp
index 7279b6c7f23a0..1ad46042b6efd 100644
--- a/clang/test/CodeGenCXX/address-space-cast-coerce.cpp
+++ b/clang/test/CodeGenCXX/address-space-cast-coerce.cpp
@@ -46,9 +46,9 @@ int mane() {
     char1 f1{1};
     char1 f2{1};
 
-// CHECK: [[TMP:%.+]] = alloca i16
-// CHECK: [[COERCE:%.+]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
-// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %{{.+}}, ptr align 2 [[COERCE]], i64 1, i1 false)
+// CHECK: [[CALL:%.*]] = call i16
+// CHECK: [[TRUNC:%.*]] = trunc i16 [[CALL]] to i8
+// CHECK: store i8 [[TRUNC]]
 
     char1 f3 = f1 + f2;
 }
diff --git a/clang/test/CodeGenCXX/cxx2a-consteval.cpp b/clang/test/CodeGenCXX/cxx2a-consteval.cpp
index 075cab58358ab..6c09053a74d24 100644
--- a/clang/test/CodeGenCXX/cxx2a-consteval.cpp
+++ b/clang/test/CodeGenCXX/cxx2a-consteval.cpp
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 -emit-llvm %s -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll
 // RUN: FileCheck -check-prefix=EVAL -input-file=%t.ll %s
 // RUN: FileCheck -check-prefix=EVAL-STATIC -input-file=%t.ll %s
@@ -275,3 +274,26 @@ void f() {
     // EVAL-FN:     call void @_ZN7GH821542S3C2Ei
 }
 }
+
+namespace GH93040 {
+struct C { char c = 1; };
+struct Empty { consteval Empty() {} };
+struct Empty2 { consteval Empty2() {} };
+struct Test : C, Empty {
+  [[no_unique_address]] Empty2 e;
+};
+static_assert(sizeof(Test) == 1);
+void f() {
+  Test test;
+
+// Make sure we don't overwrite the initialization of c.
+
+// EVAL-FN-LABEL: define {{.*}} void @_ZN7GH930404TestC2Ev
+// EVAL-FN: entry:
+// EVAL-FN-NEXT:  [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// EVAL-FN-NEXT:  store ptr {{.*}}, ptr [[THIS_ADDR]], align 8
+// EVAL-FN-NEXT:  [[THIS:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// EVAL-FN-NEXT:  call void @_ZN7GH930401CC2Ev(ptr noundef nonnull align 1 dereferenceable(1) [[THIS]])
+// EVAL-FN-NEXT:  ret void
+}
+}
diff --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp
index 3012b0f2bc33d..54912a617c287 100644
--- a/clang/test/CodeGenCXX/trivial_abi.cpp
+++ b/clang/test/CodeGenCXX/trivial_abi.cpp
@@ -262,6 +262,26 @@ void testExceptionLarge() {
   calleeExceptionLarge(Large(), Large());
 }
 
+// CHECK: define void @_ZN7GH930401gEPNS_1SE
+// CHECK: [[CALL:%.*]] = call i64 @_ZN7GH930401fEv
+// CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[CALL]] to i56
+// CHECK-NEXT: store i56 [[TRUNC]]
+// CHECK-NEXT: ret void
+void* operator new(unsigned long, void*);
+namespace GH93040 {
+struct [[clang::trivial_abi]] S {
+  char a;
+  int x;
+  __attribute((aligned(2))) char y;
+  S();
+} __attribute((packed));
+S f();
+void g(S* s) { new(s) S(f()); }
+struct S2 { [[no_unique_address]] S s; char c;};
+static_assert(sizeof(S) == 8 && sizeof(S2) == 8, "");
+}
+
+
 // PR42961
 
 // CHECK: define{{.*}} @"_ZN3$_08__invokeEv"()
diff --git a/clang/test/CodeGenHIP/dpp-const-fold.hip b/clang/test/CodeGenHIP/dpp-const-fold.hip
index f5a97c6b0e77f..c5450ec4b8413 100644
--- a/clang/test/CodeGenHIP/dpp-const-fold.hip
+++ b/clang/test/CodeGenHIP/dpp-const-fold.hip
@@ -22,25 +22,25 @@ constexpr static bool BountCtrl()
     return true & false;
 }
 
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 16, i32 0, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
 }
 
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 4, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
 }
 
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 3, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
 }
 
-// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
diff --git a/clang/test/CodeGenHIP/spirv-amdgcn-dpp-const-fold.hip b/clang/test/CodeGenHIP/spirv-amdgcn-dpp-const-fold.hip
index 2b785200e8eea..71270bc1c68d8 100644
--- a/clang/test/CodeGenHIP/spirv-amdgcn-dpp-const-fold.hip
+++ b/clang/test/CodeGenHIP/spirv-amdgcn-dpp-const-fold.hip
@@ -21,25 +21,25 @@ constexpr static bool BountCtrl()
     return true & false;
 }
 
-// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
+// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 16, i32 0, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
 }
 
-// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
+// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 4, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
 }
 
-// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
+// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 3, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
 }
 
-// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
+// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %1, i32 0, i32 0, i32 0, i1 false)
 __attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
 {
   *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 385f8a753cd8e..1651cb379a20f 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -145,7 +145,9 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
 
 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
-// AMDGCN:  store %struct.LargeStructOneMember %u.coerce, ptr addrspace(5) %[[U]], align 8
+// AMDGCN:  %[[U_ELEM:.*]] = getelementptr inbounds %struct.LargeStructOneMember, ptr addrspace(5) %[[U]], i32 0, i32 0
+// AMDGCN:  %[[EXTRACT:.*]] = extractvalue %struct.LargeStructOneMember %u.coerce, 0
+// AMDGCN:  store [100 x <2 x i32>] %[[EXTRACT]], ptr addrspace(5) %[[U_ELEM]], align 8
 // AMDGCN:  call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[U]])
 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
   FuncOneLargeMember(u);
@@ -177,7 +179,12 @@ kernel void KernelTwoMember(struct StructTwoMember u) {
 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember
 // AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
 // AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
-// AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], ptr addrspace(5) %[[u]]
+// AMDGCN:  %[[U_PTR0:.*]] = getelementptr inbounds %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 0
+// AMDGCN:  %[[EXTRACT0:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 0
+// AMDGCN:  store [40 x <2 x i32>] %[[EXTRACT0]], ptr addrspace(5) %[[U_PTR0]]
+// AMDGCN:  %[[U_PTR1:.*]] = getelementptr inbounds %struct.LargeStructTwoMember, ptr addrspace(5) %[[u]], i32 0, i32 1
+// AMDGCN:  %[[EXTRACT1:.*]] = extractvalue %struct.LargeStructTwoMember %u.coerce, 1
+// AMDGCN:  store [20 x <2 x i32>] %[[EXTRACT1]], ptr addrspace(5) %[[U_PTR1]]
 // AMDGCN:  call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref(%struct.LargeStructTwoMember) align 8 %[[u]])
 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
   FuncLargeTwoMember(u);
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index fa83a38a01b0a..fe0a2f9578db0 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -61,7 +61,7 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
 // the return value.
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker
-// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -74,7 +74,7 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
 // AMDGCN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
 // AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
 // AMDGCN-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
-// AMDGCN-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @foo([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
+// AMDGCN-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
 // AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
 // AMDGCN-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
 // AMDGCN-NEXT:    store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
@@ -98,7 +98,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large
-// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !8 !kernel_arg_base_type !8 !kernel_arg_type_qual !7 {
+// AMDGCN-SAME: (ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -168,7 +168,7 @@ void test_indirect_arg_globl(void) {
 #endif
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local
-// AMDGCN-SAME: () #[[ATTR1]] !kernel_arg_addr_space !9 !kernel_arg_access_qual !9 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !9 {
+// AMDGCN-SAME: () #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
 // AMDGCN-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
@@ -193,7 +193,7 @@ void test_indirect_arg_private(void) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember
-// AMDGCN-SAME: (<2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !13 {
+// AMDGCN-SAME: (<2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
 // AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
@@ -208,7 +208,7 @@ kernel void KernelOneMember(struct StructOneMember u) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir
-// AMDGCN-SAME: (ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !14 !kernel_arg_access_qual !11 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !13 {
+// AMDGCN-SAME: (ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
 // AMDGCN-NEXT:    store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
@@ -223,10 +223,12 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember
-// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !13 {
+// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT:    store [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], ptr addrspace(5) [[U]], align 8
+// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
+// AMDGCN-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
 // AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
 // AMDGCN-NEXT:    ret void
 //
@@ -271,15 +273,20 @@ void FuncLargeTwoMember(struct LargeStructTwoMember u) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember
-// AMDGCN-SAME: ([[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !13 {
+// AMDGCN-SAME: ([[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT:    store [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], ptr addrspace(5) [[U]], align 8
 // AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
+// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
 // AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR3]]
+// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
+// AMDGCN-NEXT:    store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
+// AMDGCN-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
+// AMDGCN-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
 // AMDGCN-NEXT:    ret void
 //
 kernel void KernelTwoMember(struct StructTwoMember u) {
@@ -287,10 +294,15 @@ kernel void KernelTwoMember(struct StructTwoMember u) {
 }
 
 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember
-// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !13 {
+// AMDGCN-SAME: ([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
 // AMDGCN-NEXT:  entry:
 // AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT:    store [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], ptr addrspace(5) [[U]], align 8
+// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
+// AMDGCN-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
+// AMDGCN-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
 // AMDGCN-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
 // AMDGCN-NEXT:    ret void
 //

From e657e0256509f6f665917904078a5389684fc716 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 26 Jul 2024 07:38:53 -0700
Subject: [PATCH 268/427] workflows: Fix tag name for release sources job
 (#100752)

(cherry picked from commit 3c2ce7088886a22ab8dc0e9488600c43644b5102)
---
 .github/workflows/release-sources.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release-sources.yml b/.github/workflows/release-sources.yml
index edb0449ef7e2c..a6c86823f99df 100644
--- a/.github/workflows/release-sources.yml
+++ b/.github/workflows/release-sources.yml
@@ -51,7 +51,7 @@ jobs:
     steps:
       - id: inputs
         run: |
-          ref=${{ inputs.release-version || github.sha }}
+          ref=${{ (inputs.release-version && format('llvmorg-{0}', inputs.release-version)) || github.sha }}
           if [ -n "${{ inputs.release-version }}" ]; then
             export_args="-release ${{ inputs.release-version }} -final"
           else

From 82a11e46ce87ea570358e4c25ee445929402a490 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Wed, 4 Sep 2024 10:02:55 +0200
Subject: [PATCH 269/427] [Clang] Workaround dependent source location issues
 (#106925)

In #78436 we made some SourceLocExpr dependent to
deal with the fact that their value should reflect the name of
specialized function - rather than the rtemplate in which they are first
used.

However SourceLocExpr are unusual in two ways
 - They don't depend on template arguments
- They morally depend on the context in which they are used (rather than
called from).

It's fair to say that this is quite novels and confuses clang. In
particular, in some cases, we used to create dependent SourceLocExpr and
never subsequently transform them, leaving dependent objects in
instantiated functions types. To work around that we avoid replacing
SourceLocExpr when we think they could remain dependent.
It's certainly not perfect but it fixes a number of reported bugs, and
seem to only affect scenarios in which the value of the SourceLocExpr
does not matter (overload resolution).

Fixes #106428
Fixes #81155
Fixes #80210
Fixes #85373

---------

Co-authored-by: Aaron Ballman <aaron@aaronballman.com>
---
 clang/docs/ReleaseNotes.rst            |  2 +
 clang/lib/Sema/SemaExpr.cpp            | 21 +++++++--
 clang/test/SemaCXX/source_location.cpp | 60 ++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5eed9827343dd..53d819c6c4457 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1121,6 +1121,8 @@ Bug Fixes to C++ Support
   Fixes (#GH85992).
 - Fixed a crash-on-invalid bug involving extraneous template parameter with concept substitution. (#GH73885)
 - Fixed assertion failure by skipping the analysis of an invalid field declaration. (#GH99868)
+- Fix an issue with dependent source location expressions (#GH106428), (#GH81155), (#GH80210), (#GH85373)
+
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index edb8b79a2220b..f56ca398cda81 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5430,11 +5430,24 @@ struct EnsureImmediateInvocationInDefaultArgs
 
   // Rewrite to source location to refer to the context in which they are used.
   ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
-    if (E->getParentContext() == SemaRef.CurContext)
+    DeclContext *DC = E->getParentContext();
+    if (DC == SemaRef.CurContext)
       return E;
-    return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
-                                             E->getBeginLoc(), E->getEndLoc(),
-                                             SemaRef.CurContext);
+
+    // FIXME: During instantiation, because the rebuild of defaults arguments
+    // is not always done in the context of the template instantiator,
+    // we run the risk of producing a dependent source location
+    // that would never be rebuilt.
+    // This usually happens during overload resolution, or in contexts
+    // where the value of the source location does not matter.
+    // However, we should find a better way to deal with source location
+    // of function templates.
+    if (!SemaRef.CurrentInstantiationScope ||
+        !SemaRef.CurContext->isDependentContext() || DC->isDependentContext())
+      DC = SemaRef.CurContext;
+
+    return getDerived().RebuildSourceLocExpr(
+        E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
   }
 };
 
diff --git a/clang/test/SemaCXX/source_location.cpp b/clang/test/SemaCXX/source_location.cpp
index 6b3610d703e71..34177bfe287fc 100644
--- a/clang/test/SemaCXX/source_location.cpp
+++ b/clang/test/SemaCXX/source_location.cpp
@@ -929,3 +929,63 @@ void test() {
 }
 
 }
+
+namespace GH106428 {
+
+struct add_fn {
+    template <typename T>
+    constexpr auto operator()(T lhs, T rhs,
+                              const std::source_location loc = std::source_location::current())
+        const -> T
+    {
+        return lhs + rhs;
+    }
+};
+
+
+template <class _Fp, class... _Args>
+decltype(_Fp{}(0, 0))
+__invoke(_Fp&& __f);
+
+template<typename T>
+struct type_identity { using type = T; };
+
+template<class Fn>
+struct invoke_result : type_identity<decltype(__invoke(Fn{}))> {};
+
+using i = invoke_result<add_fn>::type;
+static_assert(__is_same(i, int));
+
+}
+
+#if __cplusplus >= 202002L
+
+namespace GH81155 {
+struct buff {
+  buff(buff &, const char * = __builtin_FUNCTION());
+};
+
+template <class Ty>
+Ty declval();
+
+template <class Fx>
+auto Call(buff arg) -> decltype(Fx{}(arg));
+
+template <typename>
+struct F {};
+
+template <class Fx>
+struct InvocableR : F<decltype(Call<Fx>(declval<buff>()))> {
+  static constexpr bool value = false;
+};
+
+template <class Fx, bool = InvocableR<Fx>::value>
+void Help(Fx) {}
+
+void Test() {
+  Help([](buff) {});
+}
+
+}
+
+#endif

From 5cf78453b3de39247364ddf97b1c18c011283948 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Wed, 4 Sep 2024 13:36:32 +0800
Subject: [PATCH 270/427] [Clang][CodeGen] Don't emit assumptions if current
 block is unreachable. (#106936)

Fixes https://github.com/llvm/llvm-project/issues/106898.

When emitting an infinite loop, clang codegen will delete the whole
block and leave builder's current block as nullptr:

https://github.com/llvm/llvm-project/blob/837ee5b46a5f7f898f0de7e46a19600b896a0a1f/clang/lib/CodeGen/CGStmt.cpp#L597-L600

Then clang will create `zext (icmp slt %a, %b)` without parent block for
`a < b`. It will crash here:

https://github.com/llvm/llvm-project/blob/837ee5b46a5f7f898f0de7e46a19600b896a0a1f/clang/lib/CodeGen/CGExprScalar.cpp#L416-L420

Even if we disabled this optimization, it still crashes in
`Builder.CreateAssumption`:

https://github.com/llvm/llvm-project/blob/837ee5b46a5f7f898f0de7e46a19600b896a0a1f/llvm/lib/IR/IRBuilder.cpp#L551-L561

This patch disables assumptions emission if current block is null.

(cherry picked from commit c94bd96c277e0b48e198fdc831bb576d9a04aced)
---
 clang/lib/CodeGen/CGStmt.cpp        | 2 +-
 clang/test/SemaCXX/cxx23-assume.cpp | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aa97f685ac7a9..2f466602d2f68 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -745,7 +745,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
     } break;
     case attr::CXXAssume: {
       const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
-      if (getLangOpts().CXXAssumptions &&
+      if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
           !Assumption->HasSideEffects(getContext())) {
         llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
         Builder.CreateAssumption(AssumptionVal);
diff --git a/clang/test/SemaCXX/cxx23-assume.cpp b/clang/test/SemaCXX/cxx23-assume.cpp
index 9138501d726dd..eeae59daea3f7 100644
--- a/clang/test/SemaCXX/cxx23-assume.cpp
+++ b/clang/test/SemaCXX/cxx23-assume.cpp
@@ -158,3 +158,12 @@ foo (int x, int y)
   return x + y;
 }
 }
+
+// Do not crash when assumptions are unreachable.
+namespace gh106898 {
+int foo () { 
+    while(1);
+    int a = 0, b = 1;
+    __attribute__((assume (a < b)));
+}
+}

From 52e5a72e9200667e8a62436268fdaff4411f7216 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 5 Sep 2024 17:54:57 +0100
Subject: [PATCH 271/427] [AArch64] Remove redundant COPY from
 loadRegFromStackSlot (#107396)

This removes a redundant 'COPY' instruction that #81716 probably forgot
to remove.

This redundant COPY led to an issue because because code in
LiveRangeSplitting expects that the instruction emitted by
`loadRegFromStackSlot` is an instruction that accesses memory, which
isn't the case for the COPY instruction.

(cherry picked from commit 91a3c6f3d66b866bcda8a0f7d4815bc8f2dbd86c)
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp |  4 ---
 llvm/test/CodeGen/AArch64/spillfill-sve.mir  | 37 +++++++++++++++++++-
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 377bcd5868fb6..805684ef69a59 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5144,10 +5144,6 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   if (PNRReg.isValid() && !PNRReg.isVirtual())
     MI.addDef(PNRReg, RegState::Implicit);
   MI.addMemOperand(MMO);
-
-  if (PNRReg.isValid() && PNRReg.isVirtual())
-    BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
-        .addReg(DestReg);
 }
 
 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index 11cf388e38531..83c9b73c57570 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -11,6 +11,7 @@
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr2mul2() #0 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_pnr() #1 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_pnr() #1 { entry: unreachable }
+  define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
   define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
@@ -216,7 +217,7 @@ body:             |
     ; EXPAND: STR_PXI killed renamable $pn8, $sp, 7
     ;
     ; EXPAND: renamable $pn8 = LDR_PXI $sp, 7
-    ; EXPAND: $p0 = PEXT_PCI_B killed renamable $pn8, 0
+    ; EXPAND-NEXT: $p0 = PEXT_PCI_B killed renamable $pn8, 0
 
 
     %0:pnr_p8to15 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
@@ -242,6 +243,40 @@ body:             |
     RET_ReallyLR
 ...
 ---
+name: spills_fills_stack_id_virtreg_ppr_to_pnr
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: ppr }
+  - { id: 1, class: pnr_p8to15 }
+stack:
+body:             |
+  bb.0.entry:
+    liveins: $p0
+
+    %0:ppr = COPY $p0
+
+    $pn0 = IMPLICIT_DEF
+    $pn1 = IMPLICIT_DEF
+    $pn2 = IMPLICIT_DEF
+    $pn3 = IMPLICIT_DEF
+    $pn4 = IMPLICIT_DEF
+    $pn5 = IMPLICIT_DEF
+    $pn6 = IMPLICIT_DEF
+    $pn7 = IMPLICIT_DEF
+    $pn8 = IMPLICIT_DEF
+    $pn9 = IMPLICIT_DEF
+    $pn10 = IMPLICIT_DEF
+    $pn11 = IMPLICIT_DEF
+    $pn12 = IMPLICIT_DEF
+    $pn13 = IMPLICIT_DEF
+    $pn14 = IMPLICIT_DEF
+    $pn15 = IMPLICIT_DEF
+
+    %1:pnr_p8to15 = COPY %0
+    $p0 = PEXT_PCI_B %1, 0
+    RET_ReallyLR
+...
+---
 name: spills_fills_stack_id_zpr
 tracksRegLiveness: true
 registers:

From 64015eee93062b34df290338c45e87868fa750a9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Mon, 9 Sep 2024 10:56:37 +0200
Subject: [PATCH 272/427] Release note about targets built in the Windows
 packages

LLVM_TARGETS_TO_BUILD was set in #106059
---
 llvm/docs/ReleaseNotes.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index ac7bdf723a168..7b9e15da9b2b0 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -42,6 +42,10 @@ Non-comprehensive list of changes in this release
    functionality, or simply have a lot to talk about), see the `NOTE` below
    for adding a new subsection.
 
+* Starting with LLVM 19, the Windows installers only include support for the
+  X86, ARM, and AArch64 targets in order to keep the build size within the
+  limits of the NSIS installer framework.
+
 * ...
 
 Update on required toolchains to build LLVM

From 11e2a1552f92ccb080d08083ceb71f7e6ed4db78 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Date: Thu, 29 Aug 2024 14:12:02 +0100
Subject: [PATCH 273/427] [RemoveDIs] Fix spliceDebugInfo splice-to-end edge
 case (#105671)

Fix #105571 which demonstrates an end() iterator dereference when
performing a non-empty splice to end() from a region that ends at
Src::end().

Rather than calling Instruction::adoptDbgRecords from Dest, create a marker
(which takes an iterator) and absorbDebugValues onto that. The "absorb" variant
doesn't clean up the source marker, which in this case we know is a trailing
marker, so we have to do that manually.

(cherry picked from commit 43661a1214353ea1773a711f403f8d1118e9ca0f)
---
 llvm/lib/IR/BasicBlock.cpp                  | 12 ++++-
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp | 52 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 0a9498f051cb5..46896d3cdf7d5 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   if (ReadFromTail && Src->getMarker(Last)) {
     DbgMarker *FromLast = Src->getMarker(Last);
     if (LastIsEnd) {
-      Dest->adoptDbgRecords(Src, Last, true);
-      // adoptDbgRecords will release any trailers.
+      if (Dest == end()) {
+        // Abosrb the trailing markers from Src.
+        assert(FromLast == Src->getTrailingDbgRecords());
+        createMarker(Dest)->absorbDebugValues(*FromLast, true);
+        FromLast->eraseFromParent();
+        Src->deleteTrailingDbgRecords();
+      } else {
+        // adoptDbgRecords will release any trailers.
+        Dest->adoptDbgRecords(Src, Last, true);
+      }
       assert(!Src->getTrailingDbgRecords());
     } else {
       // FIXME: can we use adoptDbgRecords here to reduce allocations?
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 835780e63aaf4..5ce14d3f6b9ce 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -1525,4 +1525,56 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) {
   EXPECT_FALSE(Ret->hasDbgRecords());
 }
 
+TEST(BasicBlockDbgInfoTest, CloneTrailingRecordsToEmptyBlock) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C, R"(
+    define i16 @foo(i16 %a) !dbg !6 {
+    entry:
+      %b = add i16 %a, 0
+        #dbg_value(i16 %b, !9, !DIExpression(), !11)
+      ret i16 0, !dbg !11
+    }
+
+    !llvm.dbg.cu = !{!0}
+    !llvm.module.flags = !{!5}
+
+    !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+    !1 = !DIFile(filename: "t.ll", directory: "/")
+    !2 = !{}
+    !5 = !{i32 2, !"Debug Info Version", i32 3}
+    !6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+    !7 = !DISubroutineType(types: !2)
+    !8 = !{!9}
+    !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+    !10 = !DIBasicType(name: "ty16", size: 16, encoding: DW_ATE_unsigned)
+    !11 = !DILocation(line: 1, column: 1, scope: !6)
+)");
+  ASSERT_TRUE(M);
+
+  Function *F = M->getFunction("foo");
+  BasicBlock &BB = F->getEntryBlock();
+  // Start with no trailing records.
+  ASSERT_FALSE(BB.getTrailingDbgRecords());
+
+  BasicBlock::iterator Ret = std::prev(BB.end());
+  BasicBlock::iterator B = std::prev(Ret);
+
+  // Delete terminator which has debug records: we now get trailing records.
+  Ret->eraseFromParent();
+  EXPECT_TRUE(BB.getTrailingDbgRecords());
+
+  BasicBlock *NewBB = BasicBlock::Create(C, "NewBB", F);
+  NewBB->splice(NewBB->end(), &BB, B, BB.end());
+
+  // The trailing records should've been absorbed into NewBB.
+  EXPECT_FALSE(BB.getTrailingDbgRecords());
+  EXPECT_TRUE(NewBB->getTrailingDbgRecords());
+  if (DbgMarker *Trailing = NewBB->getTrailingDbgRecords()) {
+    EXPECT_EQ(llvm::range_size(Trailing->getDbgRecordRange()), 1u);
+    // Drop the trailing records now, to prevent a cleanup assertion.
+    Trailing->eraseFromParent();
+    NewBB->deleteTrailingDbgRecords();
+  }
+}
+
 } // End anonymous namespace.

From 42f18eedc2cf2d1f64fd5d78fda376adf39a9b3d Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Tue, 3 Sep 2024 04:52:47 -0700
Subject: [PATCH 274/427] [SLP]Fix PR107036: Check if the type of the user is
 sizable before requesting its size.

Only some instructions should be considered as potentially reducing the
size of the operands types, not all instructions should be considered.

Fixes https://github.com/llvm/llvm-project/issues/107036

(cherry picked from commit f381cd069965dabfeb277f30a4e532d7fd498f6e)
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  5 +++
 .../X86/minbw-user-non-sizable.ll             | 31 +++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cca9eeebaa53f..2f3d6b27378ae 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15539,6 +15539,11 @@ void BoUpSLP::computeMinimumValueSizes() {
                     const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
                     if (TE == UserTE || !TE)
                       return false;
+                    if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+                             SelectInst>(U) ||
+                        !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
+                             SelectInst>(UserTE->getMainOp()))
+                      return true;
                     unsigned UserTESz = DL->getTypeSizeInBits(
                         UserTE->Scalars.front()->getType());
                     auto It = MinBWs.find(TE);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll
new file mode 100644
index 0000000000000..7e7d4352e2773
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-user-non-sizable.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-100 | FileCheck %s
+
+define void @test(ptr %i) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[I:%.*]]) {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB2]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT:    store <2 x i32> [[TMP0]], ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
+; CHECK-NEXT:    [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[BB2]]
+;
+bb:
+  %i1 = getelementptr i8, ptr %i, i64 4
+  br label %bb2
+
+bb2:
+  %i3 = phi i32 [ %i6, %bb2 ], [ 0, %bb ]
+  %i4 = phi i32 [ %i8, %bb2 ], [ 0, %bb ]
+  store i32 %i3, ptr %i
+  store i32 %i4, ptr %i1
+  %i5 = trunc i32 0 to i1
+  %i6 = select i1 %i5, i32 0, i32 0
+  %i7 = trunc i32 %i4 to i1
+  %i8 = select i1 %i7, i32 0, i32 0
+  br label %bb2
+}

From 5e1a55eaa0bb592dd04f1b8474b8f064aded7b2e Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 5 Sep 2024 15:06:19 +0100
Subject: [PATCH 275/427] [AArch64] Disable SVE paired ld1/st1 for
 callee-saves.

The functionality to make use of SVE's load/store pair instructions for
the callee-saves is broken because the offsets used in the instructions
are incorrect.

This is addressed by #105518 but given the complexity of this code
and the subtleties around calculating the right offsets, we favour
disabling the behaviour altogether for LLVM 19.

This fix is critical for any programs being compiled with `+sme2`.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |   33 -
 llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll  |  104 +-
 .../CodeGen/AArch64/sme2-intrinsics-ld1.ll    | 1488 +++++++++++------
 .../CodeGen/AArch64/sme2-intrinsics-ldnt1.ll  | 1488 +++++++++++------
 .../AArch64/sve-callee-save-restore-pairs.ll  |  140 +-
 5 files changed, 2036 insertions(+), 1217 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ba46ededc63a8..87e057a468afd 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2931,16 +2931,6 @@ struct RegPairInfo {
 
 } // end anonymous namespace
 
-unsigned findFreePredicateReg(BitVector &SavedRegs) {
-  for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
-    if (SavedRegs.test(PReg)) {
-      unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
-      return PNReg;
-    }
-  }
-  return AArch64::NoRegister;
-}
-
 static void computeCalleeSaveRegisterPairs(
     MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   unsigned ExtraCSSpill = 0;
   bool HasUnpairedGPR64 = false;
-  bool HasPairZReg = false;
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
@@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
           !RegInfo->isReservedReg(MF, PairedReg))
         ExtraCSSpill = PairedReg;
     }
-    // Check if there is a pair of ZRegs, so it can select PReg for spill/fill
-    HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
-                    SavedRegs.test(CSRegs[i ^ 1]));
-  }
-
-  if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
-    AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-    // Find a suitable predicate register for the multi-vector spill/fill
-    // instructions.
-    unsigned PnReg = findFreePredicateReg(SavedRegs);
-    if (PnReg != AArch64::NoRegister)
-      AFI->setPredicateRegForFillSpill(PnReg);
-    // If no free callee-save has been found assign one.
-    if (!AFI->getPredicateRegForFillSpill() &&
-        MF.getFunction().getCallingConv() ==
-            CallingConv::AArch64_SVE_VectorCall) {
-      SavedRegs.set(AArch64::P8);
-      AFI->setPredicateRegForFillSpill(AArch64::PN8);
-    }
-
-    assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
-           "Predicate cannot be a reserved register");
   }
 
   if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index 6264ce0cf4ae6..fa8f92cb0a2c9 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -329,27 +329,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
 ; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    addvl sp, sp, #-18
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
-; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    ptrue pn8.b
 ; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
-; CHECK-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
 ; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
-; CHECK-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
 ; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
-; CHECK-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
 ; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
 ; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
+; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -371,16 +378,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
 ; CHECK-NEXT:    .cfi_restore vg
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
-; CHECK-NEXT:    ptrue pn8.b
+; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; CHECK-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
-; CHECK-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
 ; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -424,27 +438,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
 ; FP-CHECK-NEXT:    .cfi_offset w30, -40
 ; FP-CHECK-NEXT:    .cfi_offset w29, -48
 ; FP-CHECK-NEXT:    addvl sp, sp, #-18
-; FP-CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    ptrue pn8.b
 ; FP-CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
 ; FP-CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
 ; FP-CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
 ; FP-CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
 ; FP-CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; FP-CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
 ; FP-CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; FP-CHECK-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
+; FP-CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; FP-CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
 ; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
 ; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
 ; FP-CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -464,16 +485,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
 ; FP-CHECK-NEXT:    smstart sm
 ; FP-CHECK-NEXT:    .cfi_restore vg
 ; FP-CHECK-NEXT:    addvl sp, sp, #1
-; FP-CHECK-NEXT:    ptrue pn8.b
+; FP-CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; FP-CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
 ; FP-CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
-; FP-CHECK-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
 ; FP-CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
 ; FP-CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
 ; FP-CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
index 29d3d68fc4c3d..013d8a0512b15 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
@@ -55,31 +55,45 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1b { z0.b, z8.b }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -89,14 +103,20 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -109,15 +129,21 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -184,31 +210,45 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused, <v
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1b { z0.b, z8.b }, pn8/z, [x0, x1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -218,14 +258,20 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused, <v
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -238,15 +284,21 @@ define <vscale x 32 x i8> @ld1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused, <v
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -314,31 +366,45 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vscale
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1h { z0.h, z8.h }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -348,14 +414,20 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vscale
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -368,15 +440,21 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vscale
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -443,31 +521,45 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1h { z0.h, z8.h }, pn8/z, [x0, x1, lsl #1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -477,14 +569,20 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -497,15 +595,21 @@ define <vscale x 16 x i16> @ld1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused,
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -573,31 +677,45 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscale
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1w { z0.s, z8.s }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -607,14 +725,20 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscale
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -627,15 +751,21 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscale
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -702,31 +832,45 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused, <
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1w { z0.s, z8.s }, pn8/z, [x0, x1, lsl #2]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -736,14 +880,20 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused, <
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -756,15 +906,21 @@ define <vscale x 8 x i32> @ld1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused, <
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -832,31 +988,45 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscale
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1d { z0.d, z8.d }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -866,14 +1036,20 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscale
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -886,15 +1062,21 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscale
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -961,31 +1143,45 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused, <
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1d { z0.d, z8.d }, pn8/z, [x0, x1, lsl #3]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -995,14 +1191,20 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused, <
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1015,15 +1217,21 @@ define <vscale x 4 x i64> @ld1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused, <
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1093,32 +1301,46 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused, <v
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1129,14 +1351,19 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused, <v
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1153,15 +1380,20 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused, <v
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1234,32 +1466,46 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %unu
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0, x1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1270,14 +1516,19 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %unu
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1294,15 +1545,20 @@ define <vscale x 64 x i8> @ld1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %unu
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1376,32 +1632,46 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1412,14 +1682,19 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1436,15 +1711,20 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused,
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1518,32 +1798,46 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16> %u
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0, x1, lsl #1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1554,14 +1848,19 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16> %u
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1578,15 +1877,20 @@ define <vscale x 32 x i16> @ld1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16> %u
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1660,32 +1964,46 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1696,14 +2014,19 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1720,15 +2043,20 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused,
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1802,32 +2130,46 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32> %u
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0, x1, lsl #2]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1838,14 +2180,19 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32> %u
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1862,15 +2209,20 @@ define <vscale x 16 x i32> @ld1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32> %u
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1944,32 +2296,46 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused, <
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1980,14 +2346,19 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused, <
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -2004,15 +2375,20 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused, <
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -2086,32 +2462,46 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %un
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ld1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0, x1, lsl #3]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -2122,14 +2512,19 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %un
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -2146,15 +2541,20 @@ define <vscale x 8 x i64> @ld1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %un
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
index 3d3748e101122..eff1260c947dd 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
@@ -8,31 +8,45 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1b { z0.b, z8.b }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -42,14 +56,20 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -62,15 +82,21 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -90,31 +116,45 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1b { z0.b, z8.b }, pn8/z, [x0, x1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -124,14 +164,20 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -144,15 +190,21 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused,
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -173,31 +225,45 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vsca
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1h { z0.h, z8.h }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -207,14 +273,20 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vsca
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -227,15 +299,21 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vsca
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -255,31 +333,45 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1h { z0.h, z8.h }, pn8/z, [x0, x1, lsl #1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -289,14 +381,20 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -309,15 +407,21 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -338,31 +442,45 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscal
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1w { z0.s, z8.s }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -372,14 +490,20 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscal
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -392,15 +516,21 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscal
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -420,31 +550,45 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1w { z0.s, z8.s }, pn8/z, [x0, x1, lsl #2]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -454,14 +598,20 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -474,15 +624,21 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused,
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -503,31 +659,45 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscal
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1d { z0.d, z8.d }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -537,14 +707,20 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscal
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -557,15 +733,21 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscal
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -585,31 +767,45 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1d { z0.d, z8.d }, pn8/z, [x0, x1, lsl #3]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    mov z1.d, z8.d
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
 ; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    mov z1.d, z8.d
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; STRIDED-NEXT:    ret
@@ -619,14 +815,20 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-16
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-2
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -639,15 +841,21 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused,
 ; CONTIGUOUS-NEXT:    ldr z0, [sp]
 ; CONTIGUOUS-NEXT:    ldr z1, [sp, #1, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #2
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #16
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -668,32 +876,46 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -704,14 +926,19 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -728,15 +955,20 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused,
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -760,32 +992,46 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %u
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0, x1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -796,14 +1042,19 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %u
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -820,15 +1071,20 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %u
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -853,32 +1109,46 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -889,14 +1159,19 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -913,15 +1188,20 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -945,32 +1225,46 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16>
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0, x1, lsl #1]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -981,14 +1275,19 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16>
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1005,15 +1304,20 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16>
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1038,32 +1342,46 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1074,14 +1392,19 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1098,15 +1421,20 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1130,32 +1458,46 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32>
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0, x1, lsl #2]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1166,14 +1508,19 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32>
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1190,15 +1537,20 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32>
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1223,32 +1575,46 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused,
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1259,14 +1625,19 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused,
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1283,15 +1654,20 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused,
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1315,32 +1691,46 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %
 ; STRIDED-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; STRIDED-NEXT:    addvl sp, sp, #-17
 ; STRIDED-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #22, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #26, mul vl] // 32-byte Folded Spill
-; STRIDED-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #30, mul vl] // 32-byte Folded Spill
 ; STRIDED-NEXT:    mov p8.b, p0.b
+; STRIDED-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z12, [sp, #12, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z11, [sp, #13, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z10, [sp, #14, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z9, [sp, #15, mul vl] // 16-byte Folded Spill
+; STRIDED-NEXT:    str z8, [sp, #16, mul vl] // 16-byte Folded Spill
 ; STRIDED-NEXT:    ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0, x1, lsl #3]
 ; STRIDED-NEXT:    //APP
 ; STRIDED-NEXT:    nop
 ; STRIDED-NEXT:    //NO_APP
-; STRIDED-NEXT:    ptrue pn8.b
-; STRIDED-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #26, mul vl] // 32-byte Folded Reload
+; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z2.d, z8.d
 ; STRIDED-NEXT:    mov z3.d, z12.d
-; STRIDED-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #22, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #30, mul vl] // 32-byte Folded Reload
-; STRIDED-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
+; STRIDED-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; STRIDED-NEXT:    ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload
 ; STRIDED-NEXT:    mov z1.d, z4.d
 ; STRIDED-NEXT:    addvl sp, sp, #17
 ; STRIDED-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -1351,14 +1741,19 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %
 ; CONTIGUOUS-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-15
 ; CONTIGUOUS-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; CONTIGUOUS-NEXT:    ptrue pn8.b
-; CONTIGUOUS-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #6, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #10, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #14, mul vl] // 32-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #18, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z23, [sp, #1, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z22, [sp, #2, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z21, [sp, #3, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z20, [sp, #4, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z19, [sp, #5, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z18, [sp, #6, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z17, [sp, #7, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z16, [sp, #8, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z15, [sp, #9, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z14, [sp, #10, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z13, [sp, #11, mul vl] // 16-byte Folded Spill
-; CONTIGUOUS-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z11, [sp, #12, mul vl] // 16-byte Folded Spill
+; CONTIGUOUS-NEXT:    str z10, [sp, #13, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    str z9, [sp, #14, mul vl] // 16-byte Folded Spill
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #-4
 ; CONTIGUOUS-NEXT:    mov p8.b, p0.b
@@ -1375,15 +1770,20 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %
 ; CONTIGUOUS-NEXT:    ldr z2, [sp, #2, mul vl]
 ; CONTIGUOUS-NEXT:    ldr z3, [sp, #3, mul vl]
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #4
-; CONTIGUOUS-NEXT:    ptrue pn8.b
+; CONTIGUOUS-NEXT:    ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT:    ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #18, mul vl] // 32-byte Folded Reload
-; CONTIGUOUS-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
 ; CONTIGUOUS-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CONTIGUOUS-NEXT:    addvl sp, sp, #15
 ; CONTIGUOUS-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll
index 470c0dd45782c..c0d5d9dfdbb0d 100644
--- a/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll
@@ -88,27 +88,34 @@ define void @fbyte(<vscale x 16 x i8> %v) {
 ; PAIR:       // %bb.0:
 ; PAIR-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; PAIR-NEXT:    addvl sp, sp, #-18
-; PAIR-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    ptrue pn8.b
 ; PAIR-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; PAIR-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
+; PAIR-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
 ; PAIR-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
 ; PAIR-NEXT:    .cfi_offset w30, -8
 ; PAIR-NEXT:    .cfi_offset w29, -16
@@ -121,16 +128,23 @@ define void @fbyte(<vscale x 16 x i8> %v) {
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
 ; PAIR-NEXT:    bl my_func
-; PAIR-NEXT:    ptrue pn8.b
+; PAIR-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
 ; PAIR-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; PAIR-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
 ; PAIR-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -230,27 +244,34 @@ define void @fhalf(<vscale x 8 x half> %v) {
 ; PAIR:       // %bb.0:
 ; PAIR-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; PAIR-NEXT:    addvl sp, sp, #-18
-; PAIR-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    ptrue pn8.b
 ; PAIR-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
 ; PAIR-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; PAIR-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
+; PAIR-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
 ; PAIR-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
 ; PAIR-NEXT:    .cfi_offset w30, -8
 ; PAIR-NEXT:    .cfi_offset w29, -16
@@ -263,16 +284,23 @@ define void @fhalf(<vscale x 8 x half> %v) {
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
 ; PAIR-NEXT:    bl my_func
-; PAIR-NEXT:    ptrue pn8.b
+; PAIR-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
 ; PAIR-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
-; PAIR-NEXT:    ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
 ; PAIR-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -323,12 +351,11 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() {
 ; PAIR:       // %bb.0:
 ; PAIR-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; PAIR-NEXT:    addvl sp, sp, #-4
-; PAIR-NEXT:    str p8, [sp, #5, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    ptrue pn8.b
 ; PAIR-NEXT:    str p5, [sp, #6, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str z10, [sp, #1, mul vl] // 16-byte Folded Spill
-; PAIR-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
+; PAIR-NEXT:    str z9, [sp, #2, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z8, [sp, #3, mul vl] // 16-byte Folded Spill
 ; PAIR-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; PAIR-NEXT:    .cfi_offset w29, -16
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
@@ -336,10 +363,9 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() {
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
 ; PAIR-NEXT:    //APP
 ; PAIR-NEXT:    //NO_APP
-; PAIR-NEXT:    ptrue pn8.b
 ; PAIR-NEXT:    ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
-; PAIR-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ldr p8, [sp, #5, mul vl] // 2-byte Folded Reload
+; PAIR-NEXT:    ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
 ; PAIR-NEXT:    ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    addvl sp, sp, #4
@@ -381,11 +407,11 @@ define aarch64_sve_vector_pcs  void @test_clobbers_z_p_regs2() {
 ; PAIR:       // %bb.0:
 ; PAIR-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; PAIR-NEXT:    addvl sp, sp, #-4
-; PAIR-NEXT:    str p9, [sp, #7, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    ptrue pn9.b
 ; PAIR-NEXT:    str p10, [sp, #6, mul vl] // 2-byte Folded Spill
+; PAIR-NEXT:    str p9, [sp, #7, mul vl] // 2-byte Folded Spill
 ; PAIR-NEXT:    str z10, [sp, #1, mul vl] // 16-byte Folded Spill
-; PAIR-NEXT:    st1b { z8.b, z9.b }, pn9, [sp, #4, mul vl] // 32-byte Folded Spill
+; PAIR-NEXT:    str z9, [sp, #2, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    str z8, [sp, #3, mul vl] // 16-byte Folded Spill
 ; PAIR-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; PAIR-NEXT:    .cfi_offset w29, -16
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
@@ -393,10 +419,10 @@ define aarch64_sve_vector_pcs  void @test_clobbers_z_p_regs2() {
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
 ; PAIR-NEXT:    //APP
 ; PAIR-NEXT:    //NO_APP
-; PAIR-NEXT:    ptrue pn9.b
 ; PAIR-NEXT:    ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
 ; PAIR-NEXT:    ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload
-; PAIR-NEXT:    ld1b { z8.b, z9.b }, pn9/z, [sp, #4, mul vl] // 32-byte Folded Reload
 ; PAIR-NEXT:    ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload
 ; PAIR-NEXT:    addvl sp, sp, #4
 ; PAIR-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -429,20 +455,18 @@ define aarch64_sve_vector_pcs  void @test_clobbers_z_regs() {
 ; PAIR-LABEL: test_clobbers_z_regs:
 ; PAIR:       // %bb.0:
 ; PAIR-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; PAIR-NEXT:    addvl sp, sp, #-3
-; PAIR-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
-; PAIR-NEXT:    ptrue pn8.b
-; PAIR-NEXT:    st1b { z8.b, z9.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
-; PAIR-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; PAIR-NEXT:    addvl sp, sp, #-2
+; PAIR-NEXT:    str z9, [sp] // 16-byte Folded Spill
+; PAIR-NEXT:    str z8, [sp, #1, mul vl] // 16-byte Folded Spill
+; PAIR-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; PAIR-NEXT:    .cfi_offset w29, -16
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
 ; PAIR-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
 ; PAIR-NEXT:    //APP
 ; PAIR-NEXT:    //NO_APP
-; PAIR-NEXT:    ptrue pn8.b
-; PAIR-NEXT:    ld1b { z8.b, z9.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
-; PAIR-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
-; PAIR-NEXT:    addvl sp, sp, #3
+; PAIR-NEXT:    ldr z9, [sp] // 16-byte Folded Reload
+; PAIR-NEXT:    ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
+; PAIR-NEXT:    addvl sp, sp, #2
 ; PAIR-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; PAIR-NEXT:    ret
   call void asm sideeffect "", "~{z8},~{z9}"()

From bb79e7f668456473e13985a8f135cc3a45340fb5 Mon Sep 17 00:00:00 2001
From: Nicolas van Kempen <nvankemp@gmail.com>
Date: Mon, 9 Sep 2024 07:12:46 -0400
Subject: [PATCH 276/427] [clang][analyzer] Fix #embed crash (#107764)

Fix #107724.

(cherry picked from commit d84d9559bdc7aeb4ce14c251f6a3490c66db8d3a)
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp |  5 +----
 clang/test/Analysis/embed.c                  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/Analysis/embed.c

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 62a240ecbc600..c11468a08ae5c 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1928,6 +1928,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXRewrittenBinaryOperatorClass:
     case Stmt::RequiresExprClass:
     case Expr::CXXParenListInitExprClass:
+    case Stmt::EmbedExprClass:
       // Fall through.
 
     // Cases we intentionally don't evaluate, since they don't need
@@ -2430,10 +2431,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
-
-    case Stmt::EmbedExprClass:
-      llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
-      break;
   }
 }
 
diff --git a/clang/test/Analysis/embed.c b/clang/test/Analysis/embed.c
new file mode 100644
index 0000000000000..32f6c13032574
--- /dev/null
+++ b/clang/test/Analysis/embed.c
@@ -0,0 +1,12 @@
+// RUN: %clang_analyze_cc1 -std=c23 -analyzer-checker=core,debug.ExprInspection -verify %s
+
+void clang_analyzer_dump_ptr(const unsigned char *ptr);
+void clang_analyzer_dump(unsigned char val);
+
+int main() {
+    const unsigned char SelfBytes[] = {
+        #embed "embed.c"
+    };
+    clang_analyzer_dump_ptr(SelfBytes); // expected-warning {{&Element{SelfBytes,0 S64b,unsigned char}}}
+    clang_analyzer_dump(SelfBytes[0]); // expected-warning {{Unknown}} FIXME: This should be the `/` character.
+}

From f64404e32187a6f45771e72e1b65e99be82acaba Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Sat, 3 Aug 2024 22:18:11 +0200
Subject: [PATCH 277/427] [builtins] Fix divtc3.c etc. compilation on
 Solaris/SPARC with gcc (#101662)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`compiler-rt/lib/builtins/divtc3.c` and `multc3.c` don't compile on
Solaris/sparcv9 with `gcc -m32`:
```
FAILED: projects/compiler-rt/lib/builtins/CMakeFiles/clang_rt.builtins-sparc.dir/divtc3.c.o
[...]
compiler-rt/lib/builtins/divtc3.c: In function ‘__divtc3’:
compiler-rt/lib/builtins/divtc3.c:22:18: error: implicit declaration of function ‘__compiler_rt_logbtf’ [-Wimplicit-function-declaration]
   22 |   fp_t __logbw = __compiler_rt_logbtf(
      |                  ^~~~~~~~~~~~~~~~~~~~
```
and many more. It turns out that while the definition of `__divtc3` is
guarded with `CRT_HAS_F128`, the `__compiler_rt_logbtf` and other
declarations use `CRT_HAS_128BIT && CRT_HAS_F128` as guard. This only
shows up with `gcc` since, as documented in Issue #41838, `clang`
violates the SPARC psABI in not using 128-bit `long double`, so this
code path isn't used.

Fixed by changing the guards to match.

Tested on `sparcv9-sun-solaris2.11`.

(cherry picked from commit 63a7786111c501920afc4cc27a4633f76cdaf803)
---
 compiler-rt/lib/builtins/divtc3.c | 2 +-
 compiler-rt/lib/builtins/multc3.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c
index 099de5802daf0..c393de815337e 100644
--- a/compiler-rt/lib/builtins/divtc3.c
+++ b/compiler-rt/lib/builtins/divtc3.c
@@ -13,7 +13,7 @@
 #define QUAD_PRECISION
 #include "fp_lib.h"
 
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
 
 // Returns: the quotient of (a + ib) / (c + id)
 
diff --git a/compiler-rt/lib/builtins/multc3.c b/compiler-rt/lib/builtins/multc3.c
index 61a3f45e47279..a89832f0e883e 100644
--- a/compiler-rt/lib/builtins/multc3.c
+++ b/compiler-rt/lib/builtins/multc3.c
@@ -15,7 +15,7 @@
 #include "int_lib.h"
 #include "int_math.h"
 
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
 
 // Returns: the product of a + ib and c + id
 

From 2651d09ec9c4d87d09ae72d8bf42fab566fb02d0 Mon Sep 17 00:00:00 2001
From: Hua Tian <akiratian@tencent.com>
Date: Thu, 15 Aug 2024 19:03:27 +0800
Subject: [PATCH 278/427] [llvm][CodeGen] Resolve issues when updating live
 intervals in window scheduler (#101945)

Corrupted live interval information can cause window scheduling to crash
in some cases. By adding the missing MBB's live interval information in the
ModuloScheduleExpander, the information can be correctly analyzed in
the window scheduler.

(cherry picked from commit 43ba1097ee747b4ec5e757762ed0c9df6255a292)
---
 llvm/lib/CodeGen/ModuloSchedule.cpp           |   3 +
 .../CodeGen/Hexagon/swp-ws-live-intervals.mir | 217 ++++++++++++++++++
 2 files changed, 220 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir

diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 0f29ebe3ee798..b1a2bfaf78957 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -130,6 +130,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
   // Generate the prolog instructions that set up the pipeline.
   generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
   MF.insert(BB->getIterator(), KernelBB);
+  LIS.insertMBBInMaps(KernelBB);
 
   // Rearrange the instructions to generate the new, pipelined loop,
   // and update register names as needed.
@@ -210,6 +211,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
     NewBB->transferSuccessors(PredBB);
     PredBB->addSuccessor(NewBB);
     PredBB = NewBB;
+    LIS.insertMBBInMaps(NewBB);
 
     // Generate instructions for each appropriate stage. Process instructions
     // in original program order.
@@ -283,6 +285,7 @@ void ModuloScheduleExpander::generateEpilog(
 
     PredBB->replaceSuccessor(LoopExitBB, NewBB);
     NewBB->addSuccessor(LoopExitBB);
+    LIS.insertMBBInMaps(NewBB);
 
     if (EpilogStart == LoopExitBB)
       EpilogStart = NewBB;
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir
new file mode 100644
index 0000000000000..7fa3cdf62d090
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-live-intervals.mir
@@ -0,0 +1,217 @@
+# REQUIRES: asserts
+# 
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -window-search-num=100 \
+# RUN: -window-search-ratio=100 -window-diff-limit=0 -verify-machineinstrs \
+# RUN: 2>&1 | FileCheck %s
+
+# The bug was reported at https://github.com/llvm/llvm-project/pull/99454. 
+# It is caused by the corruption of live intervals in certain scenarios.
+#
+# We check the newly generated MBBs after successful scheduling here.
+# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
+# CHECK: prolog:
+# CHECK: bb.5:
+# CHECK: New block
+# CHECK: bb.6:
+# CHECK: epilog:
+# CHECK: bb.7:
+# CHECK: Best window offset is {{[0-9]+}} and Best II is {{[0-9]+}}.
+# CHECK: prolog:
+# CHECK: bb.8:
+# CHECK: New block
+# CHECK: bb.9:
+# CHECK: epilog:
+# CHECK: bb.10:
+
+--- |
+  target triple = "hexagon"
+  
+  @_dp_ctrl_calc_tu_temp2_fp = global i64 0
+  @_dp_ctrl_calc_tu_temp1_fp = global i32 0
+  @dp_panel_update_tu_timings___trans_tmp_5 = global i64 0
+  @_dp_ctrl_calc_tu___trans_tmp_8 = global i64 0
+  
+  declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+  declare i8 @div64_u64_rem(i32, ptr)
+  declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+  
+  define void @dp_ctrl_calc_tu_parameters() {
+  if.end.i:
+    %rem.i11.i = alloca i64, align 8
+    %rem.i.i = alloca i64, align 8
+    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i)
+    %call.i.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i)
+    %conv1.i.i = zext i8 %call.i.i to i64
+    %rem.promoted.i.i = load i64, ptr %rem.i11.i, align 8
+    br label %do.body.i.i
+  
+  do.body.i.i:
+    %lsr.iv1 = phi i32 [ %lsr.iv.next2, %do.body.i.i ], [ -32, %if.end.i ]
+    %sub9.i.i = phi i64 [ %rem.promoted.i.i, %if.end.i ], [ %sub8.i.i.7, %do.body.i.i ]
+    %res_abs.0.i.i = phi i64 [ %conv1.i.i, %if.end.i ], [ %res_abs.1.i.i.7, %do.body.i.i ]
+    %cmp.not.i.i = icmp ne i64 %sub9.i.i, 0
+    %sub.i.neg.i = sext i1 %cmp.not.i.i to i64
+    %sub8.i.i = add i64 %sub9.i.i, %sub.i.neg.i
+    %0 = shl i64 %res_abs.0.i.i, 2
+    %1 = select i1 %cmp.not.i.i, i64 2, i64 0
+    %shl.i.i.5 = or disjoint i64 %0, %1
+    %cmp.not.i.i.5 = icmp ne i64 %sub8.i.i, 0
+    %sub.i.neg.i.5 = sext i1 %cmp.not.i.i.5 to i64
+    %sub8.i.i.5 = add i64 %sub8.i.i, %sub.i.neg.i.5
+    %or.i.i.5 = zext i1 %cmp.not.i.i.5 to i64
+    %res_abs.1.i.i.5 = or disjoint i64 %shl.i.i.5, %or.i.i.5
+    %cmp.not.i.i.6 = icmp ne i64 %sub8.i.i.5, 0
+    %sub.i.neg.i.6 = sext i1 %cmp.not.i.i.6 to i64
+    %sub8.i.i.6 = add i64 %sub8.i.i.5, %sub.i.neg.i.6
+    %2 = shl i64 %res_abs.1.i.i.5, 2
+    %3 = select i1 %cmp.not.i.i.6, i64 2, i64 0
+    %shl.i.i.7 = or disjoint i64 %2, %3
+    %cmp.not.i.i.7 = icmp ne i64 %sub8.i.i.6, 0
+    %sub.i.neg.i.7 = sext i1 %cmp.not.i.i.7 to i64
+    %sub8.i.i.7 = add i64 %sub8.i.i.6, %sub.i.neg.i.7
+    %or.i.i.7 = zext i1 %cmp.not.i.i.7 to i64
+    %res_abs.1.i.i.7 = or disjoint i64 %shl.i.i.7, %or.i.i.7
+    %lsr.iv.next2 = add nsw i32 %lsr.iv1, 8
+    %tobool.not.i.i.7 = icmp eq i32 %lsr.iv.next2, 0
+    br i1 %tobool.not.i.i.7, label %fec_check.i, label %do.body.i.i
+  
+  fec_check.i:
+    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i)
+    store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu_temp2_fp, align 8
+    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %rem.i11.i)
+    %call.i12.i = call i8 @div64_u64_rem(i32 0, ptr nonnull %rem.i11.i)
+    %conv1.i13.i = zext i8 %call.i12.i to i64
+    %rem.promoted.i14.i = load i64, ptr %rem.i11.i, align 8
+    br label %do.body.i15.i
+  
+  do.body.i15.i:
+    %lsr.iv = phi i32 [ %lsr.iv.next, %do.body.i15.i ], [ -32, %fec_check.i ]
+    %sub9.i16.i = phi i64 [ %rem.promoted.i14.i, %fec_check.i ], [ %sub8.i22.i.7, %do.body.i15.i ]
+    %res_abs.0.i17.i = phi i64 [ %conv1.i13.i, %fec_check.i ], [ %res_abs.1.i24.i.7, %do.body.i15.i ]
+    %cmp.not.i20.i = icmp ugt i64 %sub9.i16.i, 999
+    %sub.i21.neg.i = select i1 %cmp.not.i20.i, i64 -1000, i64 0
+    %sub8.i22.i = add i64 %sub.i21.neg.i, %sub9.i16.i
+    %4 = shl i64 %res_abs.0.i17.i, 2
+    %5 = select i1 %cmp.not.i20.i, i64 2, i64 0
+    %shl.i19.i.7 = or disjoint i64 %4, %5
+    %cmp.not.i20.i.7 = icmp ugt i64 %sub8.i22.i, 999
+    %sub.i21.neg.i.7 = select i1 %cmp.not.i20.i.7, i64 -1000, i64 0
+    %sub8.i22.i.7 = add i64 %sub.i21.neg.i.7, %sub8.i22.i
+    %or.i23.i.7 = zext i1 %cmp.not.i20.i.7 to i64
+    %res_abs.1.i24.i.7 = or disjoint i64 %shl.i19.i.7, %or.i23.i.7
+    %lsr.iv.next = add nsw i32 %lsr.iv, 8
+    %tobool.not.i26.i.7 = icmp eq i32 %lsr.iv.next, 0
+    br i1 %tobool.not.i26.i.7, label %_dp_ctrl_calc_tu.exit, label %do.body.i15.i
+  
+  _dp_ctrl_calc_tu.exit:
+    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %rem.i11.i)
+    %conv.i = trunc i64 %res_abs.1.i24.i.7 to i32
+    store i32 %conv.i, ptr @_dp_ctrl_calc_tu_temp1_fp, align 4
+    %conv5.i = and i64 %res_abs.1.i24.i.7, 4294967295
+    store i64 %conv5.i, ptr @dp_panel_update_tu_timings___trans_tmp_5, align 8
+    store i64 %res_abs.1.i.i.7, ptr @_dp_ctrl_calc_tu___trans_tmp_8, align 8
+    ret void
+  }
+
+...
+---
+name:            dp_ctrl_calc_tu_parameters
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: rem.i11.i, type: default, offset: 0, size: 8, alignment: 8}
+body:             |
+  bb.0:
+    successors: %bb.1(0x80000000)
+  
+    %0:intregs = A2_tfrsi 0
+    %1:intregs = PS_fi %stack.0.rem.i11.i, 0
+    %2:intregs = A2_tfrsi 0
+    %3:doubleregs = A4_combineir 0, %2
+    %4:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0
+    %5:doubleregs = A2_tfrpi 0
+    J2_loop0i %bb.1, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.1 (machine-block-address-taken):
+    successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+  
+    %6:doubleregs = PHI %4, %bb.0, %7, %bb.1
+    %8:doubleregs = PHI %3, %bb.0, %9, %bb.1
+    %10:predregs = C2_cmpeqp %6, %5
+    %11:intregs = C2_muxii %10, 0, -1
+    %12:doubleregs = A2_addsp %11, %6
+    %13:doubleregs = S2_asl_i_p %8, 2
+    %14:intregs = S2_setbit_i %13.isub_lo, 1
+    %15:intregs = C2_mux %10, %13.isub_lo, %14
+    %16:predregs = C2_cmpeqp %12, %5
+    %17:intregs = C2_muxii %16, 0, -1
+    %18:doubleregs = A2_addsp %17, %12
+    %19:intregs = S2_setbit_i %15, 0
+    %20:intregs = C2_mux %16, %15, %19
+    %21:predregs = C2_cmpeqp %18, %5
+    %22:intregs = C2_muxii %21, 0, -1
+    %23:doubleregs = A2_addsp %22, %18
+    %24:intregs = S2_asl_i_r %20, 2
+    %25:intregs = S2_extractu %8.isub_lo, 2, 28
+    %26:intregs = S2_asl_i_r_or %25, %13.isub_hi, 2
+    %27:intregs = S2_setbit_i %24, 1
+    %28:intregs = C2_mux %21, %24, %27
+    %29:predregs = C2_cmpeqp %23, %5
+    %30:intregs = C2_muxii %29, 0, -1
+    %7:doubleregs = A2_addsp %30, %23
+    %31:intregs = S2_setbit_i %28, 0
+    %32:intregs = C2_mux %29, %28, %31
+    %9:doubleregs = REG_SEQUENCE %26, %subreg.isub_hi, %32, %subreg.isub_lo
+    ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+  
+  bb.2:
+    successors: %bb.3(0x80000000)
+  
+    S2_storerdgp @_dp_ctrl_calc_tu_temp2_fp, %9, implicit $gp
+    %33:intregs = A2_tfrsi 0
+    %34:intregs = PS_fi %stack.0.rem.i11.i, 0
+    %35:intregs = A2_tfrsi 0
+    %36:doubleregs = L2_loadrd_io %stack.0.rem.i11.i, 0
+    %37:doubleregs = A2_tfrpi 124
+    %38:intregs = A2_tfrsi -1000
+    %39:intregs = A2_tfrsi -1
+    J2_loop0i %bb.3, 4, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+  
+  bb.3 (machine-block-address-taken):
+    successors: %bb.4(0x04000000), %bb.3(0x7c000000)
+  
+    %40:doubleregs = PHI %36, %bb.2, %41, %bb.3
+    %42:intregs = PHI %35, %bb.2, %43, %bb.3
+    %44:intregs = PHI %33, %bb.2, %45, %bb.3
+    %46:doubleregs = S2_lsr_i_p %40, 3
+    %47:predregs = C2_cmpgtup %46, %37
+    %48:intregs = C2_mux %47, %38, %33
+    %49:intregs = C2_mux %47, %39, %33
+    %50:doubleregs = REG_SEQUENCE %49, %subreg.isub_hi, %48, %subreg.isub_lo
+    %51:doubleregs = A2_addp %50, %40
+    %52:intregs = S2_asl_i_r %42, 2
+    %53:intregs = S2_extractu %42, 2, 30
+    %45:intregs = S2_asl_i_r_or %53, %44, 2
+    %54:intregs = S2_setbit_i %52, 1
+    %55:intregs = C2_mux %47, %54, %52
+    %56:doubleregs = S2_lsr_i_p %51, 3
+    %57:predregs = C2_cmpgtup %56, %37
+    %58:intregs = C2_mux %57, %38, %33
+    %59:intregs = C2_mux %57, %39, %33
+    %60:doubleregs = REG_SEQUENCE %59, %subreg.isub_hi, %58, %subreg.isub_lo
+    %41:doubleregs = A2_addp %60, %51
+    %61:intregs = S2_setbit_i %55, 0
+    %43:intregs = C2_mux %57, %61, %55
+    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.4, implicit-def dead $pc
+  
+  bb.4:
+    S2_storerigp @_dp_ctrl_calc_tu_temp1_fp, %43, implicit $gp
+    %62:intregs = A2_tfrsi 0
+    %63:doubleregs = REG_SEQUENCE %43, %subreg.isub_lo, %62, %subreg.isub_hi
+    S2_storerdgp @dp_panel_update_tu_timings___trans_tmp_5, %63, implicit $gp
+    S2_storerdgp @_dp_ctrl_calc_tu___trans_tmp_8, %9, implicit $gp
+    PS_jmpret $r31, implicit-def dead $pc
+
+...

From 327ca6c02f0dbf13dd6f039d30d320a7ba1456b8 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Thu, 5 Sep 2024 23:59:11 -0700
Subject: [PATCH 279/427] [clang-format] Correctly annotate braces in macro
 definition (#107352)

This reverts commit 2d90e8f7402b0a8114978b6f014cfe76c96c94a1 and backports
commit 616a8ce6203d8c7569266bfaf163e74df1f440ad.
---
 clang/lib/Format/UnwrappedLineParser.cpp      |  6 ++++--
 clang/unittests/Format/TokenAnnotatorTest.cpp | 15 +++++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 60e65aaa83e9c..7813d86ff0ea1 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,7 +570,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
                                                  Keywords.kw_as));
           ProbablyBracedList =
-              ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren));
+              ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
+                                               NextTok->is(tok::l_paren)));
 
           // If there is a comma, semicolon or right paren after the closing
           // brace, we assume this is a braced initializer list.
@@ -609,8 +610,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
             ProbablyBracedList = NextTok->isNot(tok::l_square);
           }
 
-          // Cpp macro definition body containing nonempty braced list or block:
+          // Cpp macro definition body that is a nonempty braced list or block:
           if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
+              !FormatTok->Previous && NextTok->is(tok::eof) &&
               // A statement can end with only `;` (simple statement), a block
               // closing brace (compound statement), or `:` (label statement).
               // If PrevTok is a block opening brace, Tok ends an empty block.
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index db580d7005881..dd58fbc70cb91 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3219,6 +3219,21 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace);
   EXPECT_BRACE_KIND(Tokens[11], BK_Block);
 
+  Tokens = annotate("#define MACRO            \\\n"
+                    "  struct hash<type> {    \\\n"
+                    "    void f() { return; } \\\n"
+                    "  };");
+  ASSERT_EQ(Tokens.size(), 20u) << Tokens;
+  EXPECT_TOKEN(Tokens[8], tok::l_brace, TT_StructLBrace);
+  EXPECT_BRACE_KIND(Tokens[8], BK_Block);
+  EXPECT_TOKEN(Tokens[10], tok::identifier, TT_FunctionDeclarationName);
+  EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_FunctionDeclarationLParen);
+  EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace);
+  EXPECT_BRACE_KIND(Tokens[13], BK_Block);
+  EXPECT_BRACE_KIND(Tokens[16], BK_Block);
+  EXPECT_TOKEN(Tokens[17], tok::r_brace, TT_StructRBrace);
+  EXPECT_BRACE_KIND(Tokens[17], BK_Block);
+
   Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}");
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit);

From 8290ce0998788b6a575ed7b4988b093f48c25b3d Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Tue, 3 Sep 2024 20:36:15 +0200
Subject: [PATCH 280/427] [Clang] Fix handling of placeholder variables name in
 init captures (#107055)

We were incorrectly not deduplicating results when looking up `_` which,
for a lambda init capture, would result in an ambiguous lookup.

The same bug caused some diagnostic notes to be emitted twice.

Fixes #107024
---
 clang/docs/ReleaseNotes.rst                   | 1 +
 clang/lib/Sema/SemaLambda.cpp                 | 1 -
 clang/lib/Sema/SemaLookup.cpp                 | 2 +-
 clang/test/SemaCXX/cxx2c-placeholder-vars.cpp | 6 ++++--
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 53d819c6c4457..8c7a6ba70acd2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1122,6 +1122,7 @@ Bug Fixes to C++ Support
 - Fixed a crash-on-invalid bug involving extraneous template parameter with concept substitution. (#GH73885)
 - Fixed assertion failure by skipping the analysis of an invalid field declaration. (#GH99868)
 - Fix an issue with dependent source location expressions (#GH106428), (#GH81155), (#GH80210), (#GH85373)
+- Fix handling of ``_`` as the name of a lambda's init capture variable. (#GH107024)
 
 
 Bug Fixes to AST Handling
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 601077e9f3334..809b94bb7412b 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1318,7 +1318,6 @@ void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,
 
     if (C->Init.isUsable()) {
       addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
-      PushOnScopeChains(Var, CurScope, false);
     } else {
       TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
                                                  : TryCapture_ExplicitByVal;
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 7a6a64529f52e..d3d4bf27ae728 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -570,7 +570,7 @@ void LookupResult::resolveKind() {
 
     // For non-type declarations, check for a prior lookup result naming this
     // canonical declaration.
-    if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
+    if (!ExistingI) {
       auto UniqueResult = Unique.insert(std::make_pair(D, I));
       if (!UniqueResult.second) {
         // We've seen this entity before.
diff --git a/clang/test/SemaCXX/cxx2c-placeholder-vars.cpp b/clang/test/SemaCXX/cxx2c-placeholder-vars.cpp
index 5cf66b48784e9..29ca3b5ef3df7 100644
--- a/clang/test/SemaCXX/cxx2c-placeholder-vars.cpp
+++ b/clang/test/SemaCXX/cxx2c-placeholder-vars.cpp
@@ -50,14 +50,16 @@ void f() {
 
 void lambda() {
     (void)[_ = 0, _ = 1] { // expected-warning {{placeholder variables are incompatible with C++ standards before C++2c}} \
-                           // expected-note 4{{placeholder declared here}}
+                           // expected-note 2{{placeholder declared here}}
         (void)_++; // expected-error {{ambiguous reference to placeholder '_', which is defined multiple times}}
     };
 
     {
         int _ = 12;
-        (void)[_ = 0]{}; // no warning (different scope)
+        (void)[_ = 0]{ return _;}; // no warning (different scope)
     }
+
+    auto GH107024 = [_ = 42]() { return _; }();
 }
 
 namespace global_var {

From 32a8b56bbf0a3c7678d44ba690427915446a9a72 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Thu, 12 Sep 2024 09:50:57 -0700
Subject: [PATCH 281/427] workflows/release-binaries: Fix automatic upload
 (#107315)

(cherry picked from commit ab96409180aaad5417030f06a386253722a99d71)
---
 .github/workflows/release-binaries.yml | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 509016e5b89c4..fcd371d49e6c9 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -450,11 +450,22 @@ jobs:
         name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation
         path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
 
+    - name: Checkout Release Scripts
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        sparse-checkout: |
+          llvm/utils/release/github-upload-release.py
+          llvm/utils/git/requirements.txt
+        sparse-checkout-cone-mode: false
+
+    - name: Install Python Requirements
+      run: |
+        pip install --require-hashes -r ./llvm/utils/git/requirements.txt
+
     - name: Upload Release
       shell: bash
       run: |
-        sudo apt install python3-github
-        ./llvm-project/llvm/utils/release/github-upload-release.py \
+        ./llvm/utils/release/github-upload-release.py \
         --token ${{ github.token }} \
         --release ${{ needs.prepare.outputs.release-version }} \
         upload \

From 373180b440d04dc3cc0f6111b06684d18779d7c8 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 15 Aug 2024 07:21:10 -0700
Subject: [PATCH 282/427] [SLP]Fix PR104422: Wrong value truncation

The minbitwidth restrictions can be skipped only for immediate reduced
values, for other nodes still need to check if external users allow
bitwidth reduction.

Fixes https://github.com/llvm/llvm-project/issues/104422

(cherry picked from commit 56140a8258a3498cfcd9f0f05c182457d43cbfd2)
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  3 +-
 .../X86/operand-is-reduced-val.ll             | 49 +++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2f3d6b27378ae..ab2b96cdc42db 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15211,7 +15211,8 @@ bool BoUpSLP::collectValuesToDemote(
   if (any_of(E.Scalars, [&](Value *V) {
         return !all_of(V->users(), [=](User *U) {
           return getTreeEntry(U) ||
-                 (UserIgnoreList && UserIgnoreList->contains(U)) ||
+                 (E.Idx == 0 && UserIgnoreList &&
+                  UserIgnoreList->contains(U)) ||
                  (!isa<CmpInst>(U) && U->getType()->isSized() &&
                   !U->getType()->isScalableTy() &&
                   DL->getTypeSizeInBits(U->getType()) <= BitWidth);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
new file mode 100644
index 0000000000000..5fcac3fbf3baf
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s -slp-threshold=-10 | FileCheck %s
+
+define i64 @src(i32 %a) {
+; CHECK-LABEL: define i64 @src(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], <i64 4294967297, i64 4294967297, i64 4294967297, i64 4294967297>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
+; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP18]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> <i64 poison, i64 4294967297>, i64 [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    ret i64 [[TMP21]]
+;
+entry:
+  %0 = sext i32 %a to i64
+  %1 = add nsw i64 %0, 4294967297
+  %2 = sext i32 %a to i64
+  %3 = add nsw i64 %2, 4294967297
+  %4 = add i64 %3, %1
+  %5 = and i64 %3, 1
+  %6 = add i64 %4, %5
+  %7 = sext i32 %a to i64
+  %8 = add nsw i64 %7, 4294967297
+  %9 = add i64 %8, %6
+  %10 = and i64 %8, 1
+  %11 = add i64 %9, %10
+  %12 = sext i32 %a to i64
+  %13 = add nsw i64 %12, 4294967297
+  %14 = add i64 %13, %11
+  %15 = and i64 %13, 1
+  %16 = add i64 %14, %15
+  %17 = sext i32 %a to i64
+  %18 = add nsw i64 %17, 4294967297
+  %19 = add i64 %18, %16
+  %20 = and i64 %18, 1
+  %21 = add i64 %19, %20
+  ret i64 %21
+}

From 93998aff7662d9b3f94d9627179dffe342e2b399 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 27 Aug 2024 17:09:40 +0100
Subject: [PATCH 283/427] [AMDGPU] Fix sign confusion in performMulLoHiCombine
 (#105831)

SMUL_LOHI and UMUL_LOHI are different operations because the high part
of the result is different, so it is not OK to optimize the signed
version to MUL_U24/MULHI_U24 or the unsigned version to
MUL_I24/MULHI_I24.
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 30 +++---
 llvm/test/CodeGen/AMDGPU/mul_int24.ll         | 98 +++++++++++++++++++
 2 files changed, 116 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39ae7c96cf772..a71c9453d968d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4349,6 +4349,7 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
 
+  bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
@@ -4363,20 +4364,25 @@ AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
 
   // Try to use two fast 24-bit multiplies (one for each half of the result)
   // instead of one slow extending multiply.
-  unsigned LoOpcode, HiOpcode;
-  if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
-    N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
-    N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
-    LoOpcode = AMDGPUISD::MUL_U24;
-    HiOpcode = AMDGPUISD::MULHI_U24;
-  } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
-    N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
-    N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
-    LoOpcode = AMDGPUISD::MUL_I24;
-    HiOpcode = AMDGPUISD::MULHI_I24;
+  unsigned LoOpcode = 0;
+  unsigned HiOpcode = 0;
+  if (Signed) {
+    if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+      N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+      N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+      LoOpcode = AMDGPUISD::MUL_I24;
+      HiOpcode = AMDGPUISD::MULHI_I24;
+    }
   } else {
-    return SDValue();
+    if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+      N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+      N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+      LoOpcode = AMDGPUISD::MUL_U24;
+      HiOpcode = AMDGPUISD::MULHI_U24;
+    }
   }
+  if (!LoOpcode)
+    return SDValue();
 
   SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
   SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index be77a10380c49..8f4c48fae6fb3 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -813,4 +813,102 @@ bb7:
   ret void
 
 }
+
+define amdgpu_kernel void @test_umul_i24(ptr addrspace(1) %out, i32 %arg) {
+; SI-LABEL: test_umul_i24:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s1, s[2:3], 0xb
+; SI-NEXT:    v_mov_b32_e32 v0, 0xff803fe1
+; SI-NEXT:    s_mov_b32 s0, 0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_lshr_b32 s1, s1, 9
+; SI-NEXT:    v_mul_hi_u32 v0, s1, v0
+; SI-NEXT:    s_mul_i32 s1, s1, 0xff803fe1
+; SI-NEXT:    v_alignbit_b32 v0, v0, s1, 1
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: test_umul_i24:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s0, s[2:3], 0x2c
+; VI-NEXT:    v_mov_b32_e32 v0, 0xff803fe1
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_lshr_b32 s0, s0, 9
+; VI-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s0, v0, 0
+; VI-NEXT:    s_mov_b32 s0, 0
+; VI-NEXT:    s_mov_b32 s1, s0
+; VI-NEXT:    v_alignbit_b32 v0, v1, v0, 1
+; VI-NEXT:    s_nop 1
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: test_umul_i24:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s1, s[2:3], 0x2c
+; GFX9-NEXT:    s_mov_b32 s0, 0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_lshr_b32 s1, s1, 9
+; GFX9-NEXT:    s_mul_hi_u32 s4, s1, 0xff803fe1
+; GFX9-NEXT:    s_mul_i32 s1, s1, 0xff803fe1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s1
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; GFX9-NEXT:    s_mov_b32 s1, s0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: test_umul_i24:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 8, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     LSHR * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     MULHI * T0.X, PV.W, literal.x,
+; EG-NEXT:    -8372255(nan), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.Y, T0.W, literal.x,
+; EG-NEXT:    -8372255(nan), 0(0.000000e+00)
+; EG-NEXT:     BIT_ALIGN_INT T0.X, T0.X, PS, 1,
+; EG-NEXT:     MOV * T1.X, literal.x,
+; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+;
+; CM-LABEL: test_umul_i24:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 14, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     LSHR * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; CM-NEXT:     MULHI T0.X, T0.W, literal.x,
+; CM-NEXT:     MULHI T0.Y (MASKED), T0.W, literal.x,
+; CM-NEXT:     MULHI T0.Z (MASKED), T0.W, literal.x,
+; CM-NEXT:     MULHI * T0.W (MASKED), T0.W, literal.x,
+; CM-NEXT:    -8372255(nan), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.W, literal.x,
+; CM-NEXT:     MULLO_INT T0.Y, T0.W, literal.x,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, literal.x,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, literal.x,
+; CM-NEXT:    -8372255(nan), 0(0.000000e+00)
+; CM-NEXT:     BIT_ALIGN_INT * T0.X, T0.X, PV.Y, 1,
+; CM-NEXT:     MOV * T1.X, literal.x,
+; CM-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+  %i = lshr i32 %arg, 9
+  %i1 = zext i32 %i to i64
+  %i2 = mul i64 %i1, 4286595041
+  %i3 = lshr i64 %i2, 1
+  %i4 = trunc i64 %i3 to i32
+  store i32 %i4, ptr addrspace(1) null, align 4
+  ret void
+}
+
 attributes #0 = { nounwind }

From f0010d131b79a1b401777aa32e96defc4a935c9d Mon Sep 17 00:00:00 2001
From: R-Goc <131907007+R-Goc@users.noreply.github.com>
Date: Wed, 4 Sep 2024 20:10:36 +0200
Subject: [PATCH 284/427] [Windows SEH] Fix crash on empty seh block (#107031)

Fixes https://github.com/llvm/llvm-project/issues/105813 and
https://github.com/llvm/llvm-project/issues/106915.
Adds a check for the end of the iterator, which can be a sentinel.
The issue was introduced in
https://github.com/llvm/llvm-project/commit/0efe111365ae176671e01252d24028047d807a84
from what I can see, so along with the introduction of /EHa support.

(cherry picked from commit 2e0ded3371f8d42f376bdfd4d70687537e36818e)
---
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp  |  4 ++++
 .../CodeGen/WinEH/wineh-empty-seh-scope.ll     | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 llvm/test/CodeGen/WinEH/wineh-empty-seh-scope.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index df3d207d85d35..b961d3bb1fec7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1453,6 +1453,10 @@ void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
     if (BB->getFirstMayFaultInst()) {
       // Report IP range only for blocks with Faulty inst
       auto MBBb = MBB.getFirstNonPHI();
+
+      if (MBBb == MBB.end())
+        continue;
+
       MachineInstr *MIb = &*MBBb;
       if (MIb->isTerminator())
         continue;
diff --git a/llvm/test/CodeGen/WinEH/wineh-empty-seh-scope.ll b/llvm/test/CodeGen/WinEH/wineh-empty-seh-scope.ll
new file mode 100644
index 0000000000000..5f382f10f180b
--- /dev/null
+++ b/llvm/test/CodeGen/WinEH/wineh-empty-seh-scope.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-pc-windows-msvc19.41.34120 < %s | FileCheck %s
+
+define void @foo() personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    nop # avoids zero-length function
+  call void @llvm.seh.scope.begin()
+  unreachable
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @llvm.seh.scope.begin()
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"eh-asynch", i32 1}

From 78654faa0c6d9dc2f72b81953b9cffbb7675755b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 10 Sep 2024 09:19:39 +0800
Subject: [PATCH 285/427]  [LoongArch][ISel] Check the number of sign bits in
 `PatGprGpr_32` (#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)
---
 .../Target/LoongArch/LoongArchInstrInfo.td    |  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 67 ++++++++++++++++++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a4277873..339d50bd81921 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
-    : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
+    : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c..c22acdb496907 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    .cfi_def_cfa_offset 16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    .cfi_offset 1, -4
+; LA32-NEXT:    move $a2, $a0
+; LA32-NEXT:    srai.w $a3, $a0, 31
+; LA32-NEXT:    lu12i.w $a0, -266831
+; LA32-NEXT:    ori $a0, $a0, 3337
+; LA32-NEXT:    move $a1, $zero
+; LA32-NEXT:    bl %plt(__divdi3)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: pr107414:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a1, -266831
+; LA64-NEXT:    ori $a1, $a1, 3337
+; LA64-NEXT:    lu32i.d $a1, 0
+; LA64-NEXT:    div.d $a0, $a1, $a0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:       # %bb.0: # %entry
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:    .cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:    .cfi_offset 1, -4
+; LA32-TRAP-NEXT:    move $a2, $a0
+; LA32-TRAP-NEXT:    srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:    lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:    ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:    move $a1, $zero
+; LA32-TRAP-NEXT:    bl %plt(__divdi3)
+; LA32-TRAP-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:    ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:       # %bb.0: # %entry
+; LA64-TRAP-NEXT:    lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:    ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:    lu32i.d $a1, 0
+; LA64-TRAP-NEXT:    div.d $a1, $a1, $a0
+; LA64-TRAP-NEXT:    bnez $a0, .LBB32_2
+; LA64-TRAP-NEXT:  # %bb.1: # %entry
+; LA64-TRAP-NEXT:    break 7
+; LA64-TRAP-NEXT:  .LBB32_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a1, 0
+; LA64-TRAP-NEXT:    ret
+entry:
+  %conv = sext i32 %x to i64
+  %div = sdiv i64 3202030857, %conv
+  %conv1 = trunc i64 %div to i32
+  ret i32 %conv1
+}

From d752f29fb333d47724484e08b32d6499cc1e460e Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Tue, 10 Sep 2024 16:52:21 +0800
Subject: [PATCH 286/427] [LoongArch] Eliminate the redundant sign extension of
 division (#107971)

If all incoming values of `div.d` are sign-extended and all users only
use the lower 32 bits, then convert them to W versions.

Fixes: #107946
(cherry picked from commit 0f47e3aebdd2a4a938468a272ea4224552dbf176)
---
 llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index abac69054f3b9..ab90409fdf47d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
         break;
       }
       return false;
+    // If all incoming values are sign-extended and all users only use
+    // the lower 32 bits, then convert them to W versions.
+    case LoongArch::DIV_D: {
+      if (!AddRegToWorkList(MI->getOperand(1).getReg()))
+        return false;
+      if (!AddRegToWorkList(MI->getOperand(2).getReg()))
+        return false;
+      if (hasAllWUsers(*MI, ST, MRI)) {
+        FixableDef.insert(MI);
+        break;
+      }
+      return false;
+    }
     }
   }
 
@@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) {
     return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
     return LoongArch::ADD_W;
+  case LoongArch::DIV_D:
+    return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
     return LoongArch::LD_W;

From 6278084bc69a427cf7a610076817c420e3dc8594 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Wed, 11 Sep 2024 08:47:24 +0200
Subject: [PATCH 287/427] [Clang] Fix crash due to invalid source location in
 __is_trivially_equality_comparable (#107815)

Fixes #107777

(cherry picked from commit 6dbdb8430b492959c399a7809247424c6962902f)
---
 clang/lib/Sema/SemaExprCXX.cpp     |  3 ++-
 clang/test/SemaCXX/type-traits.cpp | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 14d1f395af90e..de50786f4d6c0 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5140,7 +5140,8 @@ static bool HasNonDeletedDefaultedEqualityComparison(Sema &S,
 
     // const ClassT& obj;
     OpaqueValueExpr Operand(
-        {}, Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
+        KeyLoc,
+        Decl->getTypeForDecl()->getCanonicalTypeUnqualified().withConst(),
         ExprValueKind::VK_LValue);
     UnresolvedSet<16> Functions;
     // obj == obj;
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 7c5be2ab374a7..608852da70331 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -3958,6 +3958,24 @@ class Template {};
 // Make sure we don't crash when instantiating a type
 static_assert(!__is_trivially_equality_comparable(Template<Template<int>>));
 
+
+struct S operator==(S, S);
+
+template <class> struct basic_string_view {};
+
+struct basic_string {
+  operator basic_string_view<int>() const;
+};
+
+template <class T>
+const bool is_trivially_equality_comparable = __is_trivially_equality_comparable(T);
+
+template <int = is_trivially_equality_comparable<basic_string> >
+void find();
+
+void func() { find(); }
+
+
 namespace hidden_friend {
 
 struct TriviallyEqualityComparable {

From a847b66a750291f8b63c03b9f355c6f4d09cdfe3 Mon Sep 17 00:00:00 2001
From: Jonathon Penix <jpenix@quicinc.com>
Date: Wed, 11 Sep 2024 09:53:11 -0700
Subject: [PATCH 288/427] [RISCV] Don't outline pcrel_lo when the function has
 a section prefix (#107943)

GNU ld will error when encountering a pcrel_lo whose corresponding
pcrel_hi is in a different section. [1] introduced a check to help
prevent this issue by preventing outlining in a few circumstances.
However, we can also hit this same issue when outlining from functions
with prefixes ("hot"/"unlikely"/"unknown" from profile information, for
example) as the outlined function might not have the same prefix,
possibly resulting in a "paired" pcrel_lo and pcrel_hi ending up in
different sections.

To prevent this issue, take a similar approach as [1] and additionally
prevent outlining when we see a pcrel_lo and the function has a prefix.

[1]
https://github.com/llvm/llvm-project/commit/96c85f80f0d615ffde0f85d8270e0a8c9f4e5430

Fixes #107520

(cherry picked from commit 866b93e6b33fac9a4bc62bbc32199bd98f434784)
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |   2 +-
 .../RISCV/machineoutliner-pcrel-lo.mir        | 104 +++++++++++++++++-
 2 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index ba3b4bd701d63..6c0cbeadebf43 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2902,7 +2902,7 @@ RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
     // if any possible.
     if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
         (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
-         F.hasSection()))
+         F.hasSection() || F.getSectionPrefix()))
       return outliner::InstrType::Illegal;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
index 8a83543b0280f..fd3630bcfad25 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
@@ -18,6 +18,9 @@
   define i32 @foo2(i32 %a, i32 %b) comdat { ret i32 0 }
 
   define i32 @foo3(i32 %a, i32 %b) section ".abc" { ret i32 0 }
+
+  define i32 @foo4(i32 %a, i32 %b) !section_prefix !0 { ret i32 0 }
+  !0 = !{!"function_section_prefix", !"myprefix"}
 ...
 ---
 name:            foo
@@ -27,23 +30,24 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11, implicit $x13
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   PseudoRET
+  ;
   ; CHECK-FS-LABEL: name: foo
   ; CHECK-FS: bb.0:
   ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
@@ -109,26 +113,27 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
   ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
   ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   liveins: $x10, $x11, $x13
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_1, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
   ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
   ; CHECK-NEXT:   PseudoBR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   PseudoRET
+  ;
   ; CHECK-FS-LABEL: name: foo2
   ; CHECK-FS: bb.0:
   ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
@@ -223,6 +228,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   PseudoRET
+  ;
   ; CHECK-FS-LABEL: name: foo3
   ; CHECK-FS: bb.0:
   ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
@@ -289,3 +295,89 @@ body:             |
   bb.3:
     PseudoRET
 ...
+---
+name:            foo4
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: foo4
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   PseudoRET
+  ;
+  ; CHECK-FS-LABEL: name: foo4
+  ; CHECK-FS: bb.0:
+  ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-FS-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-FS-NEXT:   PseudoBR %bb.3
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT: bb.1:
+  ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-FS-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-FS-NEXT:   PseudoBR %bb.3
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT: bb.2:
+  ; CHECK-FS-NEXT:   liveins: $x10, $x11, $x13
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT:   $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
+  ; CHECK-FS-NEXT:   $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+  ; CHECK-FS-NEXT:   PseudoBR %bb.3
+  ; CHECK-FS-NEXT: {{  $}}
+  ; CHECK-FS-NEXT: bb.3:
+  ; CHECK-FS-NEXT:   PseudoRET
+  bb.0:
+    liveins: $x10, $x11, $x13
+
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x11 = AND $x12, $x11
+    $x10 = SUB $x10, $x11
+    $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+    PseudoBR %bb.3
+
+  bb.1:
+    liveins: $x10, $x11, $x13
+
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x11 = AND $x12, $x11
+    $x10 = SUB $x10, $x11
+    $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+    PseudoBR %bb.3
+
+  bb.2:
+    liveins: $x10, $x11, $x13
+
+    $x11 = ORI $x11, 1023
+    $x12 = ADDI $x10, 17
+    $x11 = AND $x12, $x11
+    $x10 = SUB $x10, $x11
+    $x11 = LW killed renamable $x13, target-flags(riscv-pcrel-lo) <mcsymbol .Lpcrel_hi1> :: (dereferenceable load (s32) from @bar)
+    PseudoBR %bb.3
+
+  bb.3:
+    PseudoRET
+...

From 82f3a4a32d2500ab1e6c51e0d749ffbac9afb1fa Mon Sep 17 00:00:00 2001
From: Konstantin Varlamov <varconsteq@gmail.com>
Date: Fri, 13 Sep 2024 01:26:57 -0700
Subject: [PATCH 289/427] Guard an include of `<ostream>` in `<chrono>` with
 availability macro (#108429)

This fixes a regression introduced in
https://github.com/llvm/llvm-project/pull/96035.

(cherry picked from commit 127c34948bd54e92ef2ee544e8bc42acecf321ad)
---
 libcxx/include/chrono | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 990c415ec2e97..7bec5e5a26ef4 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -1015,8 +1015,8 @@ constexpr chrono::year                                  operator ""y(unsigned lo
 #  include <charconv>
 #  if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
 #    include <locale>
+#    include <ostream>
 #  endif
-#  include <ostream>
 #endif
 
 #endif // _LIBCPP_CHRONO

From 82e85b62da3f62759ab94aecd0ebac61f3856719 Mon Sep 17 00:00:00 2001
From: Brian Cain <bcain@quicinc.com>
Date: Fri, 13 Sep 2024 17:10:03 -0500
Subject: [PATCH 290/427] [lld] select a default eflags for hexagon (#108431)

Empty archives are apparently routine in linux kernel builds, so instead
of asserting, we should handle this case with a sane default value.

(cherry picked from commit d1ba432533aafc52fc59158350af937a8b6b9538)
---
 lld/ELF/Arch/Hexagon.cpp     | 8 +++-----
 lld/test/ELF/hexagon-eflag.s | 5 +++++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 54821c299bde9..abde3cd964917 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -60,17 +60,15 @@ Hexagon::Hexagon() {
 }
 
 uint32_t Hexagon::calcEFlags() const {
-  assert(!ctx.objectFiles.empty());
-
   // The architecture revision must always be equal to or greater than
   // greatest revision in the list of inputs.
-  uint32_t ret = 0;
+  std::optional<uint32_t> ret;
   for (InputFile *f : ctx.objectFiles) {
     uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
-    if (eflags > ret)
+    if (!ret || eflags > *ret)
       ret = eflags;
   }
-  return ret;
+  return ret.value_or(/* Default Arch Rev: */ 0x60);
 }
 
 static uint32_t applyMask(uint32_t mask, uint32_t data) {
diff --git a/lld/test/ELF/hexagon-eflag.s b/lld/test/ELF/hexagon-eflag.s
index 01cb5e5b0f293..dbe8604f69fda 100644
--- a/lld/test/ELF/hexagon-eflag.s
+++ b/lld/test/ELF/hexagon-eflag.s
@@ -5,3 +5,8 @@
 # RUN: llvm-readelf -h  %t3 | FileCheck %s
 # Verify that the largest arch in the input list is selected.
 # CHECK: Flags: 0x62
+
+# RUN: llvm-ar rcsD %t4
+# RUN: ld.lld -m hexagonelf %t4 -o %t5
+# RUN: llvm-readelf -h  %t5 | FileCheck --check-prefix=CHECK-EMPTYARCHIVE %s
+# CHECK-EMPTYARCHIVE: Flags: 0x60

From 149a150b50c112e26fc5acbdd58250c44ccd777f Mon Sep 17 00:00:00 2001
From: Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
Date: Mon, 16 Sep 2024 11:16:14 +0000
Subject: [PATCH 291/427] [X86] AMD Zen 5 Initial enablement

---
 clang/lib/Basic/Targets/X86.cpp               |   4 +
 clang/test/CodeGen/target-builtin-noerror.c   |   1 +
 clang/test/Driver/x86-march.c                 |   4 +
 clang/test/Frontend/x86-target-cpu.c          |   1 +
 clang/test/Misc/target-invalid-cpu-note.c     |   8 +-
 .../Preprocessor/predefined-arch-macros.c     | 142 ++++++++++++++++++
 compiler-rt/lib/builtins/cpu_model/x86.c      |  20 +++
 .../llvm/TargetParser/X86TargetParser.def     |   3 +
 .../llvm/TargetParser/X86TargetParser.h       |   1 +
 llvm/lib/Target/X86/X86.td                    |  15 ++
 llvm/lib/Target/X86/X86PfmCounters.td         |   1 +
 llvm/lib/TargetParser/Host.cpp                |  19 +++
 llvm/lib/TargetParser/X86TargetParser.cpp     |   5 +
 .../CodeGen/X86/bypass-slow-division-64.ll    |   1 +
 llvm/test/CodeGen/X86/cmp16.ll                |   1 +
 llvm/test/CodeGen/X86/cpus-amd.ll             |   1 +
 llvm/test/CodeGen/X86/rdpru.ll                |   1 +
 llvm/test/CodeGen/X86/shuffle-as-shifts.ll    |   1 +
 llvm/test/CodeGen/X86/slow-unaligned-mem.ll   |   1 +
 llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll   |   1 +
 .../X86/tuning-shuffle-permilpd-avx512.ll     |   1 +
 .../X86/tuning-shuffle-permilps-avx512.ll     |   1 +
 .../X86/tuning-shuffle-unpckpd-avx512.ll      |   1 +
 .../X86/tuning-shuffle-unpckps-avx512.ll      |   1 +
 .../X86/vector-shuffle-fast-per-lane.ll       |   1 +
 llvm/test/CodeGen/X86/vpdpwssd.ll             |   1 +
 .../CodeGen/X86/x86-64-double-shifts-var.ll   |   1 +
 llvm/test/MC/X86/x86_long_nop.s               |   2 +
 .../Transforms/LoopUnroll/X86/call-remark.ll  |   1 +
 .../Transforms/SLPVectorizer/X86/pr63668.ll   |   1 +
 30 files changed, 238 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 18e6dbf03e00d..072c97e6c8c6e 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_ZNVER4:
     defineCPUMacros(Builder, "znver4");
     break;
+  case CK_ZNVER5:
+    defineCPUMacros(Builder, "znver5");
+    break;
   case CK_Geode:
     defineCPUMacros(Builder, "geode");
     break;
@@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_ZNVER2:
     case CK_ZNVER3:
     case CK_ZNVER4:
+    case CK_ZNVER5:
     // Deprecated
     case CK_x86_64:
     case CK_x86_64_v2:
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 2e16fd8b9fe4d..d681dcd3a13e8 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -205,4 +205,5 @@ void verifycpustrings(void) {
   (void)__builtin_cpu_is("znver2");
   (void)__builtin_cpu_is("znver3");
   (void)__builtin_cpu_is("znver4");
+  (void)__builtin_cpu_is("znver5");
 }
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index cc993b53937c1..3bc2a82ae778d 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -242,6 +242,10 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver4
 // znver4: "-target-cpu" "znver4"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver5 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=znver5
+// znver5: "-target-cpu" "znver5"
 
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
 // x86-64: "-target-cpu" "x86-64"
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index 6c8502ac2c21e..f2885a040c370 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -38,5 +38,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver5 -verify %s
 //
 // expected-no-diagnostics
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 4d6759dd81537..6fd71bb82381a 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -13,19 +13,19 @@
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
-// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
+// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
 // X86_64: error: unknown target CPU 'not-a-cpu'
-// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
+// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
 // TUNE_X86: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
+// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
+// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 6f470d85ca563..a90ec1f56b1a3 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -3923,6 +3923,148 @@
 // CHECK_ZNVER4_M64: #define __znver4 1
 // CHECK_ZNVER4_M64: #define __znver4__ 1
 
+// RUN: %clang -march=znver5 -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M32
+// CHECK_ZNVER5_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER5_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER5_M32: #define __ADX__ 1
+// CHECK_ZNVER5_M32: #define __AES__ 1
+// CHECK_ZNVER5_M32: #define __AVX2__ 1
+// CHECK_ZNVER5_M32: #define __AVX512BF16__ 1
+// CHECK_ZNVER5_M32: #define __AVX512BITALG__ 1
+// CHECK_ZNVER5_M32: #define __AVX512BW__ 1
+// CHECK_ZNVER5_M32: #define __AVX512CD__ 1
+// CHECK_ZNVER5_M32: #define __AVX512DQ__ 1
+// CHECK_ZNVER5_M32: #define __AVX512F__ 1
+// CHECK_ZNVER5_M32: #define __AVX512IFMA__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VBMI__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VL__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VNNI__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VP2INTERSECT__ 1
+// CHECK_ZNVER5_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER5_M32: #define __AVXVNNI__ 1
+// CHECK_ZNVER5_M32: #define __AVX__ 1
+// CHECK_ZNVER5_M32: #define __BMI2__ 1
+// CHECK_ZNVER5_M32: #define __BMI__ 1
+// CHECK_ZNVER5_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER5_M32: #define __CLWB__ 1
+// CHECK_ZNVER5_M32: #define __CLZERO__ 1
+// CHECK_ZNVER5_M32: #define __F16C__ 1
+// CHECK_ZNVER5_M32-NOT: #define __FMA4__ 1
+// CHECK_ZNVER5_M32: #define __FMA__ 1
+// CHECK_ZNVER5_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER5_M32: #define __GFNI__ 1
+// CHECK_ZNVER5_M32: #define __LZCNT__ 1
+// CHECK_ZNVER5_M32: #define __MMX__ 1
+// CHECK_ZNVER5_M32: #define __MOVDIR64B__ 1
+// CHECK_ZNVER5_M32: #define __MOVDIRI__ 1
+// CHECK_ZNVER5_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER5_M32: #define __PKU__ 1
+// CHECK_ZNVER5_M32: #define __POPCNT__ 1
+// CHECK_ZNVER5_M32: #define __PREFETCHI__ 1
+// CHECK_ZNVER5_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER5_M32: #define __RDPID__ 1
+// CHECK_ZNVER5_M32: #define __RDPRU__ 1
+// CHECK_ZNVER5_M32: #define __RDRND__ 1
+// CHECK_ZNVER5_M32: #define __RDSEED__ 1
+// CHECK_ZNVER5_M32: #define __SHA__ 1
+// CHECK_ZNVER5_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER5_M32: #define __SSE2__ 1
+// CHECK_ZNVER5_M32: #define __SSE3__ 1
+// CHECK_ZNVER5_M32: #define __SSE4A__ 1
+// CHECK_ZNVER5_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER5_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER5_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER5_M32: #define __SSE__ 1
+// CHECK_ZNVER5_M32: #define __SSSE3__ 1
+// CHECK_ZNVER5_M32-NOT: #define __TBM__ 1
+// CHECK_ZNVER5_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER5_M32-NOT: #define __XOP__ 1
+// CHECK_ZNVER5_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER5_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER5_M32: #define __XSAVES__ 1
+// CHECK_ZNVER5_M32: #define __XSAVE__ 1
+// CHECK_ZNVER5_M32: #define __i386 1
+// CHECK_ZNVER5_M32: #define __i386__ 1
+// CHECK_ZNVER5_M32: #define __tune_znver5__ 1
+// CHECK_ZNVER5_M32: #define __znver5 1
+// CHECK_ZNVER5_M32: #define __znver5__ 1
+
+// RUN: %clang -march=znver5 -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M64
+// CHECK_ZNVER5_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER5_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER5_M64: #define __ADX__ 1
+// CHECK_ZNVER5_M64: #define __AES__ 1
+// CHECK_ZNVER5_M64: #define __AVX2__ 1
+// CHECK_ZNVER5_M64: #define __AVX512BF16__ 1
+// CHECK_ZNVER5_M64: #define __AVX512BITALG__ 1
+// CHECK_ZNVER5_M64: #define __AVX512BW__ 1
+// CHECK_ZNVER5_M64: #define __AVX512CD__ 1
+// CHECK_ZNVER5_M64: #define __AVX512DQ__ 1
+// CHECK_ZNVER5_M64: #define __AVX512F__ 1
+// CHECK_ZNVER5_M64: #define __AVX512IFMA__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VBMI__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VL__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VNNI__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VP2INTERSECT__ 1
+// CHECK_ZNVER5_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER5_M64: #define __AVXVNNI__ 1
+// CHECK_ZNVER5_M64: #define __AVX__ 1
+// CHECK_ZNVER5_M64: #define __BMI2__ 1
+// CHECK_ZNVER5_M64: #define __BMI__ 1
+// CHECK_ZNVER5_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER5_M64: #define __CLWB__ 1
+// CHECK_ZNVER5_M64: #define __CLZERO__ 1
+// CHECK_ZNVER5_M64: #define __F16C__ 1
+// CHECK_ZNVER5_M64-NOT: #define __FMA4__ 1
+// CHECK_ZNVER5_M64: #define __FMA__ 1
+// CHECK_ZNVER5_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER5_M64: #define __GFNI__ 1
+// CHECK_ZNVER5_M64: #define __LZCNT__ 1
+// CHECK_ZNVER5_M64: #define __MMX__ 1
+// CHECK_ZNVER5_M64: #define __MOVDIR64B__ 1
+// CHECK_ZNVER5_M64: #define __MOVDIRI__ 1
+// CHECK_ZNVER5_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER5_M64: #define __PKU__ 1
+// CHECK_ZNVER5_M64: #define __POPCNT__ 1
+// CHECK_ZNVER5_M64: #define __PREFETCHI__ 1
+// CHECK_ZNVER5_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER5_M64: #define __RDPID__ 1
+// CHECK_ZNVER5_M64: #define __RDPRU__ 1
+// CHECK_ZNVER5_M64: #define __RDRND__ 1
+// CHECK_ZNVER5_M64: #define __RDSEED__ 1
+// CHECK_ZNVER5_M64: #define __SHA__ 1
+// CHECK_ZNVER5_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER5_M64: #define __SSE2__ 1
+// CHECK_ZNVER5_M64: #define __SSE3__ 1
+// CHECK_ZNVER5_M64: #define __SSE4A__ 1
+// CHECK_ZNVER5_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER5_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER5_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER5_M64: #define __SSE__ 1
+// CHECK_ZNVER5_M64: #define __SSSE3__ 1
+// CHECK_ZNVER5_M64-NOT: #define __TBM__ 1
+// CHECK_ZNVER5_M64: #define __VAES__ 1
+// CHECK_ZNVER5_M64: #define __VPCLMULQDQ__ 1
+// CHECK_ZNVER5_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER5_M64-NOT: #define __XOP__ 1
+// CHECK_ZNVER5_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER5_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER5_M64: #define __XSAVES__ 1
+// CHECK_ZNVER5_M64: #define __XSAVE__ 1
+// CHECK_ZNVER5_M64: #define __amd64 1
+// CHECK_ZNVER5_M64: #define __amd64__ 1
+// CHECK_ZNVER5_M64: #define __tune_znver5__ 1
+// CHECK_ZNVER5_M64: #define __x86_64 1
+// CHECK_ZNVER5_M64: #define __x86_64__ 1
+// CHECK_ZNVER5_M64: #define __znver5 1
+// CHECK_ZNVER5_M64: #define __znver5__ 1
+
 // End X86/GCC/Linux tests ------------------
 
 // Begin PPC/GCC/Linux tests ----------------
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 867ed97e57bf2..b1c4abd9d11df 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -59,6 +59,7 @@ enum ProcessorTypes {
   INTEL_SIERRAFOREST,
   INTEL_GRANDRIDGE,
   INTEL_CLEARWATERFOREST,
+  AMDFAM1AH,
   CPU_TYPE_MAX
 };
 
@@ -97,6 +98,7 @@ enum ProcessorSubtypes {
   INTEL_COREI7_ARROWLAKE,
   INTEL_COREI7_ARROWLAKE_S,
   INTEL_COREI7_PANTHERLAKE,
+  AMDFAM1AH_ZNVER5,
   CPU_SUBTYPE_MAX
 };
 
@@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
       break; //  "znver4"
     }
     break; // family 19h
+  case 26:
+    CPU = "znver5";
+    *Type = AMDFAM1AH;
+    if (Model <= 0x77) {
+      // Models 00h-0Fh (Breithorn).
+      // Models 10h-1Fh (Breithorn-Dense).
+      // Models 20h-2Fh (Strix 1).
+      // Models 30h-37h (Strix 2).
+      // Models 38h-3Fh (Strix 3).
+      // Models 40h-4Fh (Granite Ridge).
+      // Models 50h-5Fh (Weisshorn).
+      // Models 60h-6Fh (Krackan1).
+      // Models 70h-77h (Sarlak).
+      CPU = "znver5";
+      *Subtype = AMDFAM1AH_ZNVER5;
+      break; //  "znver5"
+    }
+    break;
   default:
     break; // Unknown AMD CPU.
   }
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 92798cbe4b4c1..008cf5381c126 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H,       "zhaoxin_fam7h")
 X86_CPU_TYPE(INTEL_SIERRAFOREST,  "sierraforest")
 X86_CPU_TYPE(INTEL_GRANDRIDGE,    "grandridge")
 X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
+X86_CPU_TYPE(AMDFAM1AH,           "amdfam1ah")
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_TYPE_ALIAS(INTEL_BONNELL,    "atom")
 X86_CPU_TYPE_ALIAS(AMDFAM10H,        "amdfam10")
 X86_CPU_TYPE_ALIAS(AMDFAM15H,        "amdfam15")
+X86_CPU_TYPE_ALIAS(AMDFAM1AH,        "amdfam1a")
 X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
 
 #undef X86_CPU_TYPE_ALIAS
@@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
 X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE,      "arrowlake")
 X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S,    "arrowlake-s")
 X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE,    "pantherlake")
+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5,            "znver5")
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 2083e585af4ac..5468aaa81edb7 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -147,6 +147,7 @@ enum CPUKind {
   CK_x86_64_v3,
   CK_x86_64_v4,
   CK_Geode,
+  CK_ZNVER5,
 };
 
 /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9dafd5e628ca8..e82e624f70997 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1543,6 +1543,19 @@ def ProcessorFeatures {
                                                   FeatureVPOPCNTDQ];
   list<SubtargetFeature> ZN4Features =
     !listconcat(ZN3Features, ZN4AdditionalFeatures);
+
+
+  list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
+  list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
+                                                  FeatureMOVDIRI,
+                                                  FeatureMOVDIR64B,
+                                                  FeatureVP2INTERSECT,
+                                                  FeaturePREFETCHI,
+                                                  FeatureAVXVNNI
+                                                  ];
+  list<SubtargetFeature> ZN5Features =
+    !listconcat(ZN4Features, ZN5AdditionalFeatures);
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
                 ProcessorFeatures.ZN3Tuning>;
 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
            ProcessorFeatures.ZN4Tuning>;
+def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
+                ProcessorFeatures.ZN5Tuning>;
 
 def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 2b1dac411c992..c30e989cdc2af 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
   let ValidationCounters = DefaultAMDPfmValidationCounters;
 }
 def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
+def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 7e637cba4cfbc..865b6a44adbb0 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
       break; //  "znver4"
     }
     break; // family 19h
+  case 26:
+    CPU = "znver5";
+    *Type = X86::AMDFAM1AH;
+    if (Model <= 0x77) {
+      // Models 00h-0Fh (Breithorn).
+      // Models 10h-1Fh (Breithorn-Dense).
+      // Models 20h-2Fh (Strix 1).
+      // Models 30h-37h (Strix 2).
+      // Models 38h-3Fh (Strix 3).
+      // Models 40h-4Fh (Granite Ridge).
+      // Models 50h-5Fh (Weisshorn).
+      // Models 60h-6Fh (Krackan1).
+      // Models 70h-77h (Sarlak).
+      CPU = "znver5";
+      *Subtype = X86::AMDFAM1AH_ZNVER5;
+      break; //  "znver5"
+    }
+    break;
+
   default:
     break; // Unknown AMD CPU.
   }
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index dcf9130052ac1..a6f3b5ba5d33e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
     FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
     FeatureGFNI | FeatureSHSTK;
 
+static constexpr FeatureBitset FeaturesZNVER5 =
+    FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
+    FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
+
 // D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
 // X86TargetParser.def to here. They are assigned by following ways:
 // 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
   { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
   { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
   { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
+  { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
   // Generic 64-bit processor.
   { {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
   { {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
index 6e0cfdd26a786..b0ca0069a526b 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
@@ -23,6 +23,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2          | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3          | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4          | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5          | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
 
 ; Additional tests for 64-bit divide bypass
 
diff --git a/llvm/test/CodeGen/X86/cmp16.ll b/llvm/test/CodeGen/X86/cmp16.ll
index fa9e75ff16a5c..8c14a78d9e113 100644
--- a/llvm/test/CodeGen/X86/cmp16.ll
+++ b/llvm/test/CodeGen/X86/cmp16.ll
@@ -13,6 +13,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=X64,X64-FAST
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=X64,X64-FAST
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=X64,X64-FAST
 
 define i1 @cmp16_reg_eq_reg(i16 %a0, i16 %a1) {
 ; X86-GENERIC-LABEL: cmp16_reg_eq_reg:
diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll
index 228a00428c457..33b2cf3731478 100644
--- a/llvm/test/CodeGen/X86/cpus-amd.ll
+++ b/llvm/test/CodeGen/X86/cpus-amd.ll
@@ -29,6 +29,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
diff --git a/llvm/test/CodeGen/X86/rdpru.ll b/llvm/test/CodeGen/X86/rdpru.ll
index 7771f52653cb5..be79a4499a338 100644
--- a/llvm/test/CodeGen/X86/rdpru.ll
+++ b/llvm/test/CodeGen/X86/rdpru.ll
@@ -6,6 +6,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 -fast-isel | FileCheck %s --check-prefix=X64
 
 define void @rdpru_asm() {
 ; X86-LABEL: rdpru_asm:
diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
index e89197f5b42c3..9c8729b3ea505 100644
--- a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
+++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server  | FileCheck %s --check-prefixes=CHECK,CHECK-ICX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4  | FileCheck %s --check-prefixes=CHECK,CHECK-V4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
 
 
 define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) {
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index d74d195439bda..ceef3fb4bb188 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -50,6 +50,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3        2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4        2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver5        2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
 
 ; Other chips with slow unaligned memory accesses
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
index 9f2071ff14b87..2b78a70ebcc26 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -6,6 +6,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5      | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64      | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
index 7d8bb567c09b3..162ab71fc00d4 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
@@ -4,6 +4,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4  | FileCheck %s --check-prefixes=CHECK,CHECK-V4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
 
 define <8 x double> @transform_VPERMILPSZrr(<8 x double> %a) nounwind {
 ; CHECK-LABEL: transform_VPERMILPSZrr:
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
index 5d031f6017c77..cd97946da248f 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
@@ -4,6 +4,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4  | FileCheck %s --check-prefixes=CHECK,CHECK-V4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
 
 define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
 ; CHECK-LABEL: transform_VPERMILPSZrr:
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
index 4a160bc9debc7..5ea991f85523e 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4  | FileCheck %s --check-prefixes=CHECK,CHECK-V4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
 
 
 define <16 x float> @transform_VUNPCKLPDZrr(<16 x float> %a, <16 x float> %b) nounwind {
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
index d0e3ad9b19086..96155f0300d2d 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4  | FileCheck %s --check-prefixes=CHECK,CHECK-V4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5  | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
 
 define <16 x float> @transform_VUNPCKLPSZrr(<16 x float> %a, <16 x float> %b) nounwind {
 ; CHECK-LABEL: transform_VUNPCKLPSZrr:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
index e59532d4fef30..4021b1bf292bb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -8,6 +8,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
diff --git a/llvm/test/CodeGen/X86/vpdpwssd.ll b/llvm/test/CodeGen/X86/vpdpwssd.ll
index e6a07b4aeb271..3c1eb92e9e3c3 100644
--- a/llvm/test/CodeGen/X86/vpdpwssd.ll
+++ b/llvm/test/CodeGen/X86/vpdpwssd.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s
 
 define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
index af6fbdc9f60de..bbaa414924707 100644
--- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -16,6 +16,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s
index 6136c3db9a3da..b79403bb5f1ec 100644
--- a/llvm/test/MC/X86/x86_long_nop.s
+++ b/llvm/test/MC/X86/x86_long_nop.s
@@ -19,6 +19,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver5 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver5 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
diff --git a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
index abdcfcf7e0742..b05994ddfa35e 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
@@ -1,6 +1,7 @@
 ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
 ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI  < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s
 ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
+; RUN: opt -passes=debugify,loop-unroll -mcpu=znver5 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
 
 ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 --try-experimental-debuginfo-iterators | FileCheck --check-prefixes=ALL,UNROLL %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
index 391771e06cab8..037e073de9d59 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 ; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver5 -S < %s | FileCheck %s
 
 define internal i32 @testfunc() {
 ; CHECK-LABEL: define internal i32 @testfunc

From bdae3c487cbb2b4161e7fbb54a855f0ba55da61a Mon Sep 17 00:00:00 2001
From: Zaara Syeda <syzaara@ca.ibm.com>
Date: Tue, 10 Sep 2024 14:14:01 -0400
Subject: [PATCH 292/427] [PowerPC] Fix assert exposed by PR 95931 in
 LowerBITCAST (#108062)

Hit Assertion failed: Num < NumOperands && "Invalid child # of SDNode!"
Fix by checking opcode and value type before calling getOperand.

(cherry picked from commit 22067a8eb43a7194e65913b47a9c724fde3ed68f)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  9 +++++----
 llvm/test/CodeGen/PowerPC/f128-bitcast.ll   | 22 +++++++++++++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 21cf4d9eeac17..758de9d732fa7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9338,12 +9338,13 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   SDValue Op0 = Op->getOperand(0);
 
+  if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+      (Op.getValueType() != MVT::f128))
+    return SDValue();
+
   SDValue Lo = Op0.getOperand(0);
   SDValue Hi = Op0.getOperand(1);
-
-  if ((Op.getValueType() != MVT::f128) ||
-      (Op0.getOpcode() != ISD::BUILD_PAIR) || (Lo.getValueType() != MVT::i64) ||
-      (Hi.getValueType() != MVT::i64) || !Subtarget.isPPC64())
+  if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
     return SDValue();
 
   if (!Subtarget.isLittleEndian())
diff --git a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
index ffbfbd0c64ff3..55ba3cb1e0538 100644
--- a/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-bitcast.ll
@@ -86,3 +86,25 @@ entry:
   ret i64 %1
 }
 
+define <4 x i32> @truncBitcast(i512 %a) {
+; CHECK-LABEL: truncBitcast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mtvsrdd v2, r4, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: truncBitcast:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mtvsrdd v2, r9, r10
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P8-LABEL: truncBitcast:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = trunc i512 %a to i128
+  %1 = bitcast i128 %0 to <4 x i32>
+  ret <4 x i32> %1
+}

From bd4ff65a601895ba816623cddb36ce466cceabe6 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 17 Sep 2024 09:39:01 +0200
Subject: [PATCH 293/427] Revert "[LoongArch] Eliminate the redundant sign
 extension of division (#107971)"

This reverts commit d752f29fb333d47724484e08b32d6499cc1e460e.
---
 llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index ab90409fdf47d..abac69054f3b9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,19 +637,6 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
         break;
       }
       return false;
-    // If all incoming values are sign-extended and all users only use
-    // the lower 32 bits, then convert them to W versions.
-    case LoongArch::DIV_D: {
-      if (!AddRegToWorkList(MI->getOperand(1).getReg()))
-        return false;
-      if (!AddRegToWorkList(MI->getOperand(2).getReg()))
-        return false;
-      if (hasAllWUsers(*MI, ST, MRI)) {
-        FixableDef.insert(MI);
-        break;
-      }
-      return false;
-    }
     }
   }
 
@@ -664,8 +651,6 @@ static unsigned getWOp(unsigned Opcode) {
     return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
     return LoongArch::ADD_W;
-  case LoongArch::DIV_D:
-    return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
     return LoongArch::LD_W;

From 560ed047d183348b341ffd4e27712c254d82f589 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 17 Sep 2024 09:39:18 +0200
Subject: [PATCH 294/427] Revert " [LoongArch][ISel] Check the number of sign
 bits in `PatGprGpr_32` (#107432)"

This reverts commit 78654faa0c6d9dc2f72b81953b9cffbb7675755b.
---
 .../Target/LoongArch/LoongArchInstrInfo.td    |  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 67 +------------------
 2 files changed, 3 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 339d50bd81921..ef647a4277873 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,13 +1065,10 @@ def RDTIME_D : RDTIME_2R<0x00006800>;
 
 /// Generic pattern classes
 
-def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
-  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
-}]>;
 class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
-    : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
+    : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c22acdb496907..ab3eec240db3c 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1153,64 +1151,3 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
-
-define signext i32 @pr107414(i32 signext %x) {
-; LA32-LABEL: pr107414:
-; LA32:       # %bb.0: # %entry
-; LA32-NEXT:    addi.w $sp, $sp, -16
-; LA32-NEXT:    .cfi_def_cfa_offset 16
-; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-NEXT:    .cfi_offset 1, -4
-; LA32-NEXT:    move $a2, $a0
-; LA32-NEXT:    srai.w $a3, $a0, 31
-; LA32-NEXT:    lu12i.w $a0, -266831
-; LA32-NEXT:    ori $a0, $a0, 3337
-; LA32-NEXT:    move $a1, $zero
-; LA32-NEXT:    bl %plt(__divdi3)
-; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 16
-; LA32-NEXT:    ret
-;
-; LA64-LABEL: pr107414:
-; LA64:       # %bb.0: # %entry
-; LA64-NEXT:    lu12i.w $a1, -266831
-; LA64-NEXT:    ori $a1, $a1, 3337
-; LA64-NEXT:    lu32i.d $a1, 0
-; LA64-NEXT:    div.d $a0, $a1, $a0
-; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    ret
-;
-; LA32-TRAP-LABEL: pr107414:
-; LA32-TRAP:       # %bb.0: # %entry
-; LA32-TRAP-NEXT:    addi.w $sp, $sp, -16
-; LA32-TRAP-NEXT:    .cfi_def_cfa_offset 16
-; LA32-TRAP-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
-; LA32-TRAP-NEXT:    .cfi_offset 1, -4
-; LA32-TRAP-NEXT:    move $a2, $a0
-; LA32-TRAP-NEXT:    srai.w $a3, $a0, 31
-; LA32-TRAP-NEXT:    lu12i.w $a0, -266831
-; LA32-TRAP-NEXT:    ori $a0, $a0, 3337
-; LA32-TRAP-NEXT:    move $a1, $zero
-; LA32-TRAP-NEXT:    bl %plt(__divdi3)
-; LA32-TRAP-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
-; LA32-TRAP-NEXT:    addi.w $sp, $sp, 16
-; LA32-TRAP-NEXT:    ret
-;
-; LA64-TRAP-LABEL: pr107414:
-; LA64-TRAP:       # %bb.0: # %entry
-; LA64-TRAP-NEXT:    lu12i.w $a1, -266831
-; LA64-TRAP-NEXT:    ori $a1, $a1, 3337
-; LA64-TRAP-NEXT:    lu32i.d $a1, 0
-; LA64-TRAP-NEXT:    div.d $a1, $a1, $a0
-; LA64-TRAP-NEXT:    bnez $a0, .LBB32_2
-; LA64-TRAP-NEXT:  # %bb.1: # %entry
-; LA64-TRAP-NEXT:    break 7
-; LA64-TRAP-NEXT:  .LBB32_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a1, 0
-; LA64-TRAP-NEXT:    ret
-entry:
-  %conv = sext i32 %x to i64
-  %div = sdiv i64 3202030857, %conv
-  %conv1 = trunc i64 %div to i32
-  ret i32 %conv1
-}

From a4bf6cd7cfb1a1421ba92bca9d017b49936c55e4 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 17 Sep 2024 13:26:36 +0200
Subject: [PATCH 295/427] Bump version to 19.1.0 (final)

---
 cmake/Modules/LLVMVersion.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 7e3bb98c3577a..928c6c439bd1f 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -10,6 +10,6 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX -rc4)
+  set(LLVM_VERSION_SUFFIX)
 endif()
 

From 64075837b5532108a1fe96a5b158feb7a9025694 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 3 Sep 2024 22:45:54 +0300
Subject: [PATCH 296/427] [clang] Don't add DWARF debug info when assembling .s
 with clang-cl /Z7 (#106686)

This fixes a regression from f58330cbe44598eb2de0cca3b812f67fea0a71ca.

That commit changed the clang-cl options /Zi and /Z7 to be implemented
as aliases of -g rather than having separate handling.

This had the unintended effect, that when assembling .s files with
clang-cl, the /Z7 option (which implies using CodeView debug info) was
treated as a -g option, which causes `ClangAs::ConstructJob` to pick up
the option as part of `Args.getLastArg(options::OPT_g_Group)`, which
sets the `WantDebug` variable.

Within `Clang::ConstructJob`, we check for whether explicit `-gdwarf` or
`-gcodeview` options have been set, and if not, we pick the default
debug format for the current toolchain. However, in `ClangAs`, if debug
info has been enabled, it always adds DWARF debug info.

Add similar logic in `ClangAs` - check if the user has explicitly
requested either DWARF or CodeView, otherwise look up the toolchain
default. If we (either implicitly or explicitly) should be producing
CodeView, don't enable the default `ClangAs` DWARF generation.

This fixes the issue, where assembling a single `.s` file with clang-cl,
with the /Z7 option, causes the file to contain some DWARF sections.
This causes the output executable to contain DWARF, in addition to the
separate intended main PDB file.

By having the output executable contain DWARF sections, LLDB only looks
at the (very little) DWARF info in the executable, rather than looking
for a separate standalone PDB file. This caused an issue with LLDB's
tests, https://github.com/llvm/llvm-project/issues/101710.

(cherry picked from commit fcb7b390ccd5b4cfc71f13b5e16a846f3f400c10)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 26 ++++++++++++++++++++++++++
 clang/test/Driver/debug-options-as.c  | 17 ++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 366b147a052bf..8858c318aba7a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     WantDebug = !A->getOption().matches(options::OPT_g0) &&
                 !A->getOption().matches(options::OPT_ggdb0);
 
+  // If a -gdwarf argument appeared, remember it.
+  bool EmitDwarf = false;
+  if (const Arg *A = getDwarfNArg(Args))
+    EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  bool EmitCodeView = false;
+  if (const Arg *A = Args.getLastArg(options::OPT_gcodeview))
+    EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain());
+
+  // If the user asked for debug info but did not explicitly specify -gcodeview
+  // or -gdwarf, ask the toolchain for the default format.
+  if (!EmitCodeView && !EmitDwarf && WantDebug) {
+    switch (getToolChain().getDefaultDebugFormat()) {
+    case llvm::codegenoptions::DIF_CodeView:
+      EmitCodeView = true;
+      break;
+    case llvm::codegenoptions::DIF_DWARF:
+      EmitDwarf = true;
+      break;
+    }
+  }
+
+  // If the arguments don't imply DWARF, don't emit any debug info here.
+  if (!EmitDwarf)
+    WantDebug = false;
+
   llvm::codegenoptions::DebugInfoKind DebugInfoKind =
       llvm::codegenoptions::NoDebugInfo;
 
diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c
index c83c0cb90431d..cb0492177ff47 100644
--- a/clang/test/Driver/debug-options-as.c
+++ b/clang/test/Driver/debug-options-as.c
@@ -19,12 +19,27 @@
 // GGDB0-NOT: -debug-info-kind=
 
 // Check to make sure clang with -g on a .s file gets passed.
-// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
+// This requires a target that defaults to DWARF.
+// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x assembler %s 2>&1 \
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
 // CHECK: "-debug-info-kind=constructor"
 
+// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't
+// trigger producing DWARF output.
+// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+//
+// MSVC: "-cc1as"
+// MSVC-NOT: "-debug-info-kind=constructor"
+
+// Check that clang-cl with the -Z7 option works the same, not triggering
+// any DWARF output.
+//
+// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MSVC %s
+
 // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer.
 // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=P %s

From de7ee2e3ae893d61c0fa1a601e84ed538ec85e70 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 17 Sep 2024 17:07:35 -0700
Subject: [PATCH 297/427] [bolt][tests] Skip tests that use perf when perf
 counters are unavailable (#107892)

On the GitHub Action runners, perf always fails with the error below ,
so we need to skip the perf tests on platforms like this that have
limited access to the perf counters.

```
Access to performance monitoring and observability operations is limited.
Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open
access to performance monitoring and observability operations for processes
without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.
More information can be found at 'Perf events and tool security' document:
https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
perf_event_paranoid setting is 4:
  -1: Allow use of (almost) all events by all users
      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
>= 0: Disallow raw and ftrace function tracepoint access
>= 1: Disallow CPU event access
>= 2: Disallow kernel profiling
To make the adjusted perf_event_paranoid setting permanent preserve it
in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)
```

(cherry picked from commit 773353b20a49bfa0dab608d415c1b4734d037fce)
---
 bolt/test/perf2bolt/lit.local.cfg | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
index 4ee9ad08cc78a..0fecf913aa98b 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,4 +1,5 @@
 import shutil
+import subprocess
 
-if shutil.which("perf") is not None:
-    config.available_features.add("perf")
\ No newline at end of file
+if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0:
+    config.available_features.add("perf")

From 22139b36d7a81cd2dc08e9499a915def4aefc64e Mon Sep 17 00:00:00 2001
From: Andrew Ng <andrew.ng@sony.com>
Date: Fri, 13 Sep 2024 12:19:42 +0100
Subject: [PATCH 298/427] Reland [llvm-ml] Fix RIP-relative addressing for ptr
 operands (#108061)

Relands #107618 with fix for assertion triggered by OpenMP runtime MASM
assembly source.

(cherry picked from commit 7574e1ddc4be63628cb7617857cc8938058a79d2)
---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp      |  3 ++-
 llvm/test/tools/llvm-ml/rip_relative_addressing.asm | 12 +++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c7f88fed9b128..efbcb57add98c 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2707,7 +2707,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
   bool MaybeDirectBranchDest = true;
 
   if (Parser.isParsingMasm()) {
-    if (is64BitMode() && SM.getElementSize() > 0) {
+    if (is64BitMode() &&
+        ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
       DefaultBaseReg = X86::RIP;
     }
     if (IsUnconditionalBranch) {
diff --git a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
index d237e84435b7d..c005b9721c07e 100644
--- a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
+++ b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
@@ -53,4 +53,14 @@ mov eax, [t8]
 ; CHECK-LABEL: t8:
 ; CHECK: mov eax, dword ptr [t8]
 
-END
\ No newline at end of file
+t9:
+mov eax, dword ptr [bar]
+; CHECK-LABEL: t9:
+; CHECK-32: mov eax, dword ptr [bar]
+; CHECK-64: mov eax, dword ptr [rip + bar]
+
+t10:
+mov ebx, dword ptr [4*eax]
+; CHECK: mov ebx, dword ptr [4*eax]
+
+END

From 910dde5780f9e8f3a16612bd89b512b2594e9ab7 Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Sat, 14 Sep 2024 11:19:34 +0800
Subject: [PATCH 299/427] [LoongArch][sanitizer] Fix SC_ADDRERR_{RD,WR} missing
 in the musl environment (#108557)

Fixes #108550

(cherry picked from commit 1825cf28dc83113200b623ebcf063eea35ade79a)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 648df0c4e5a76..b9b1f496df7c9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#    ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#      define SC_ADDRERR_RD (1 << 30)
+#    endif
+#    ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#      define SC_ADDRERR_WR (1 << 31)
+#    endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
     return SignalContext::Read;

From 1720219a1dea5c9b99df6dd1fdfb9dc8e77054fd Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 18 Sep 2024 23:06:12 -0700
Subject: [PATCH 300/427] [ELF] --icf: don't fold a section without relocation
 and a section with relocations for SHT_CREL

Similar to commit 686cff17cc310884e48ae963bf7507f96950cc90 for SHT_REL (#57693).
CREL hasn't been tested with ICF before.

And avoid a pitfall that eqClass[0] might interfere with ICF.

(cherry picked from commit e82f0838ae88ad69515ebec234765e3e2607bebf)
---
 lld/ELF/ICF.cpp          | 4 ++--
 lld/ELF/InputSection.cpp | 6 +++---
 lld/ELF/InputSection.h   | 4 ++++
 lld/test/ELF/icf10.s     | 3 +++
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 44e8a71cc6286..5591c5e71e0b1 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -324,7 +324,7 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
 
   const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
   const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
     return constantEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
              ? constantEq(a, ra.rels, b, rb.rels)
@@ -376,7 +376,7 @@ template <class ELFT>
 bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) {
   const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
   const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
     return variableEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
              ? variableEq(a, ra.rels, b, rb.rels)
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 570e485455bad..a165c813d4259 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -150,12 +150,12 @@ RelsOrRelas<ELFT> InputSectionBase::relsOrRelas(bool supportsCrel) const {
     InputSectionBase *const &relSec = f->getSections()[relSecIdx];
     // Otherwise, allocate a buffer to hold the decoded RELA relocations. When
     // called for the first time, relSec is null (without --emit-relocs) or an
-    // InputSection with zero eqClass[0].
-    if (!relSec || !cast<InputSection>(relSec)->eqClass[0]) {
+    // InputSection with false decodedCrel.
+    if (!relSec || !cast<InputSection>(relSec)->decodedCrel) {
       auto *sec = makeThreadLocal<InputSection>(*f, shdr, name);
       f->cacheDecodedCrel(relSecIdx, sec);
       sec->type = SHT_RELA;
-      sec->eqClass[0] = SHT_RELA;
+      sec->decodedCrel = true;
 
       RelocsCrel<ELFT::Is64Bits> entries(sec->content_);
       sec->size = entries.size() * sizeof(typename ELFT::Rela);
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 6659530a9c9c2..afa6ee5bd0826 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -176,6 +176,10 @@ class InputSectionBase : public SectionBase {
 
   mutable bool compressed = false;
 
+  // Whether this section is SHT_CREL and has been decoded to RELA by
+  // relsOrRelas.
+  bool decodedCrel = false;
+
   // Whether the section needs to be padded with a NOP filler due to
   // deleteFallThruJmpInsn.
   bool nopFiller = false;
diff --git a/lld/test/ELF/icf10.s b/lld/test/ELF/icf10.s
index 3c18c431c3b9d..ff926d0e16b10 100644
--- a/lld/test/ELF/icf10.s
+++ b/lld/test/ELF/icf10.s
@@ -5,6 +5,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %s -o %t.o
 # RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | FileCheck %s
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o --crel
+# RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | FileCheck %s
+
 # Checks that ICF does not merge 2 sections the offset of
 # the relocations of which differ.
 

From 3e512ba17d90d5ab5382bae61a3e5762d2e3f42a Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Mon, 26 Aug 2024 14:30:26 +0800
Subject: [PATCH 301/427] [Clang][Concepts] Fix the constraint equivalence
 checking involving parameter packs (#102131)

We established an instantiation scope in order for constraint
equivalence checking to properly map the uninstantiated parameters.

That mechanism mapped even packs to themselves. Consequently, parameter
packs e.g. appearing in a function call, were not expanded. So they
would end up becoming `SubstTemplateTypeParmPackType`s that circularly
depend on the canonical declaration of the function template, which is
not yet determined, hence the spurious error.

No release note as I plan to backport it to 19.

Fixes https://github.com/llvm/llvm-project/issues/101735

---------

Co-authored-by: cor3ntin <corentinjabot@gmail.com>
(cherry picked from commit e6974daa7bc100c8b88057d50f3ec3eca7282243)
---
 clang/lib/Sema/SemaConcept.cpp                | 26 +++++++++++++++++--
 .../SemaTemplate/concepts-out-of-line-def.cpp | 23 ++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index c34d32002b5ad..244f6ef2f53fa 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -969,8 +969,30 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
   // equivalence.
   LocalInstantiationScope ScopeForParameters(S);
   if (auto *FD = DeclInfo.getDecl()->getAsFunction())
-    for (auto *PVD : FD->parameters())
-      ScopeForParameters.InstantiatedLocal(PVD, PVD);
+    for (auto *PVD : FD->parameters()) {
+      if (!PVD->isParameterPack()) {
+        ScopeForParameters.InstantiatedLocal(PVD, PVD);
+        continue;
+      }
+      // This is hacky: we're mapping the parameter pack to a size-of-1 argument
+      // to avoid building SubstTemplateTypeParmPackTypes for
+      // PackExpansionTypes. The SubstTemplateTypeParmPackType node would
+      // otherwise reference the AssociatedDecl of the template arguments, which
+      // is, in this case, the template declaration.
+      //
+      // However, as we are in the process of comparing potential
+      // re-declarations, the canonical declaration is the declaration itself at
+      // this point. So if we didn't expand these packs, we would end up with an
+      // incorrect profile difference because we will be profiling the
+      // canonical types!
+      //
+      // FIXME: Improve the "no-transform" machinery in FindInstantiatedDecl so
+      // that we can eliminate the Scope in the cases where the declarations are
+      // not necessarily instantiated. It would also benefit the noexcept
+      // specifier comparison.
+      ScopeForParameters.MakeInstantiatedLocalArgPack(PVD);
+      ScopeForParameters.InstantiatedLocalPackArg(PVD, PVD);
+    }
 
   std::optional<Sema::CXXThisScopeRAII> ThisScope;
 
diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
index 0142efcdc3ee8..333187b0d74ad 100644
--- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
+++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
@@ -599,3 +599,26 @@ template <class DerT>
 unsigned long DerivedCollection<DerTs...>::index() {}
 
 } // namespace GH72557
+
+namespace GH101735 {
+
+template <class, class>
+concept True = true;
+
+template <typename T>
+class A {
+  template <typename... Ts>
+  void method(Ts&... ts)
+    requires requires (T t) {
+      { t.method(static_cast<Ts &&>(ts)...) } -> True<void>;
+    };
+};
+
+template <typename T>
+template <typename... Ts>
+void A<T>::method(Ts&... ts)
+  requires requires (T t) {
+    { t.method(static_cast<Ts &&>(ts)...) } -> True<void>;
+  } {}
+
+}

From b8b5050279e6c586e8dd7851ebf11139a40e6644 Mon Sep 17 00:00:00 2001
From: Patryk Wychowaniec <pwychowaniec@pm.me>
Date: Thu, 29 Aug 2024 09:28:17 +0200
Subject: [PATCH 302/427] [AVR] Fix 16-bit LDDs with immediate overflows
 (#104923)

16-bit loads are expanded into a pair of 8-bit loads, so the maximum
offset of such 16-bit loads must be 62, not 63.

(cherry picked from commit c7a4efa4294789b1116f0c4a320c16fcb27cb62c)
---
 llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp       |   9 +-
 .../CodeGen/AVR/ldd-immediate-overflow.ll     | 144 ++++++++++++++++++
 .../CodeGen/AVR/std-immediate-overflow.ll     | 137 +++++++++++++++++
 3 files changed, 288 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll
 create mode 100644 llvm/test/CodeGen/AVR/std-immediate-overflow.ll

diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 77db876d47e44..a8927d834630e 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -122,8 +122,13 @@ bool AVRDAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
     // offset allowed.
     MVT VT = cast<MemSDNode>(Op)->getMemoryVT().getSimpleVT();
 
-    // We only accept offsets that fit in 6 bits (unsigned).
-    if (isUInt<6>(RHSC) && (VT == MVT::i8 || VT == MVT::i16)) {
+    // We only accept offsets that fit in 6 bits (unsigned), with the exception
+    // of 16-bit loads - those can only go up to 62, because we desugar them
+    // into a pair of 8-bit loads like `ldd rx, RHSC` + `ldd ry, RHSC + 1`.
+    bool OkI8 = VT == MVT::i8 && RHSC <= 63;
+    bool OkI16 = VT == MVT::i16 && RHSC <= 62;
+
+    if (OkI8 || OkI16) {
       Base = N.getOperand(0);
       Disp = CurDAG->getTargetConstant(RHSC, dl, MVT::i8);
 
diff --git a/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll b/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll
new file mode 100644
index 0000000000000..6f1a4b32bb054
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll
@@ -0,0 +1,144 @@
+; RUN: llc -march=avr -filetype=asm -O1 < %s | FileCheck %s
+
+define void @check60(ptr %1) {
+; CHECK-LABEL: check60:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ldd r24, Z+60
+; CHECK-NEXT: ldd r25, Z+61
+; CHECK-NEXT: ldd r18, Z+62
+; CHECK-NEXT: ldd r19, Z+63
+; CHECK-NEXT: sts 3, r19
+; CHECK-NEXT: sts 2, r18
+; CHECK-NEXT: sts 1, r25
+; CHECK-NEXT: sts 0, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 60
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
+
+define void @check61(ptr %1) {
+; CHECK-LABEL: check61:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ldd r18, Z+61
+; CHECK-NEXT: ldd r19, Z+62
+; CHECK-NEXT: adiw r24, 63
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ld r24, Z
+; CHECK-NEXT: ldd r25, Z+1
+; CHECK-NEXT: sts 3, r25
+; CHECK-NEXT: sts 2, r24
+; CHECK-NEXT: sts 1, r19
+; CHECK-NEXT: sts 0, r18
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 61
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
+
+define void @check62(ptr %1) {
+; CHECK-LABEL: check62:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ldd r18, Z+62
+; CHECK-NEXT: ldd r19, Z+63
+; CHECK-NEXT: adiw r24, 62
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ldd r24, Z+2
+; CHECK-NEXT: ldd r25, Z+3
+; CHECK-NEXT: sts 3, r25
+; CHECK-NEXT: sts 2, r24
+; CHECK-NEXT: sts 1, r19
+; CHECK-NEXT: sts 0, r18
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 62
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
+
+define void @check63(ptr %1) {
+; CHECK-LABEL: check63:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: adiw r24, 63
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ld r24, Z
+; CHECK-NEXT: ldd r25, Z+1
+; CHECK-NEXT: ldd r18, Z+2
+; CHECK-NEXT: ldd r19, Z+3
+; CHECK-NEXT: sts 3, r19
+; CHECK-NEXT: sts 2, r18
+; CHECK-NEXT: sts 1, r25
+; CHECK-NEXT: sts 0, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 63
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
+
+define void @check64(ptr %1) {
+; CHECK-LABEL: check64:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: subi r24, 192
+; CHECK-NEXT: sbci r25, 255
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ld r24, Z
+; CHECK-NEXT: ldd r25, Z+1
+; CHECK-NEXT: ldd r18, Z+2
+; CHECK-NEXT: ldd r19, Z+3
+; CHECK-NEXT: sts 3, r19
+; CHECK-NEXT: sts 2, r18
+; CHECK-NEXT: sts 1, r25
+; CHECK-NEXT: sts 0, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 64
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
+
+define void @check65(ptr %1) {
+; CHECK-LABEL: check65:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: subi r24, 191
+; CHECK-NEXT: sbci r25, 255
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: ld r24, Z
+; CHECK-NEXT: ldd r25, Z+1
+; CHECK-NEXT: ldd r18, Z+2
+; CHECK-NEXT: ldd r19, Z+3
+; CHECK-NEXT: sts 3, r19
+; CHECK-NEXT: sts 2, r18
+; CHECK-NEXT: sts 1, r25
+; CHECK-NEXT: sts 0, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i16 65
+  %3 = load i32, ptr %2, align 1
+  store i32 %3, ptr null, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/AVR/std-immediate-overflow.ll b/llvm/test/CodeGen/AVR/std-immediate-overflow.ll
new file mode 100644
index 0000000000000..18ccb79d3a5f8
--- /dev/null
+++ b/llvm/test/CodeGen/AVR/std-immediate-overflow.ll
@@ -0,0 +1,137 @@
+; RUN: llc -march=avr -filetype=asm -O1 < %s | FileCheck %s
+
+define void @check60(ptr %1) {
+; CHECK-LABEL: check60:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+63, r19
+; CHECK-NEXT: std Z+62, r18
+; CHECK-NEXT: ldi r24, 210
+; CHECK-NEXT: ldi r25, 4
+; CHECK-NEXT: std Z+61, r25
+; CHECK-NEXT: std Z+60, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 60
+  store i32 1234, ptr %2
+  ret void
+}
+
+define void @check61(ptr %1) {
+; CHECK-LABEL: check61:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: ldi r18, 210
+; CHECK-NEXT: ldi r19, 4
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+62, r19
+; CHECK-NEXT: std Z+61, r18
+; CHECK-NEXT: adiw r24, 63
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+1, r19
+; CHECK-NEXT: st Z, r18
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 61
+  store i32 1234, ptr %2
+  ret void
+}
+
+define void @check62(ptr %1) {
+; CHECK-LABEL: check62:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: ldi r18, 210
+; CHECK-NEXT: ldi r19, 4
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+63, r19
+; CHECK-NEXT: std Z+62, r18
+; CHECK-NEXT: adiw r24, 62
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+3, r19
+; CHECK-NEXT: std Z+2, r18
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 62
+  store i32 1234, ptr %2
+  ret void
+}
+
+define void @check63(ptr %1) {
+; CHECK-LABEL: check63:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: adiw r24, 63
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+3, r19
+; CHECK-NEXT: std Z+2, r18
+; CHECK-NEXT: ldi r24, 210
+; CHECK-NEXT: ldi r25, 4
+; CHECK-NEXT: std Z+1, r25
+; CHECK-NEXT: st Z, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 63
+  store i32 1234, ptr %2
+  ret void
+}
+
+define void @check64(ptr %1) {
+; CHECK-LABEL: check64:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: subi r24, 192
+; CHECK-NEXT: sbci r25, 255
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+3, r19
+; CHECK-NEXT: std Z+2, r18
+; CHECK-NEXT: ldi r24, 210
+; CHECK-NEXT: ldi r25, 4
+; CHECK-NEXT: std Z+1, r25
+; CHECK-NEXT: st Z, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 64
+  store i32 1234, ptr %2
+  ret void
+}
+
+define void @check65(ptr %1) {
+; CHECK-LABEL: check65:
+; CHECK-NEXT: %bb.0
+; CHECK-NEXT: subi r24, 191
+; CHECK-NEXT: sbci r25, 255
+; CHECK-NEXT: ldi r18, 0
+; CHECK-NEXT: ldi r19, 0
+; CHECK-NEXT: mov r30, r24
+; CHECK-NEXT: mov r31, r25
+; CHECK-NEXT: std Z+3, r19
+; CHECK-NEXT: std Z+2, r18
+; CHECK-NEXT: ldi r24, 210
+; CHECK-NEXT: ldi r25, 4
+; CHECK-NEXT: std Z+1, r25
+; CHECK-NEXT: st Z, r24
+; CHECK-NEXT: ret
+
+bb0:
+  %2 = getelementptr i8, ptr %1, i8 65
+  store i32 1234, ptr %2
+  ret void
+}

From e1e7dfeb900bccfd527fa08f0320b9d630f62c21 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 6 Sep 2024 17:05:32 -0700
Subject: [PATCH 303/427] [lldb] Fix some tests that fail with system libstdc++
 (#106885)

This is a revert of b1fcc1840c312472cb9ccb8c4e5e02ca13b31113.

These tests weren't working on Ubuntu 22.04 or Fedora 37-40. I'm not
sure exactly why, but it seems like they may be incompatible with
libstdc++. Also, despite the fact that the tests were using the system
libstdc++, the tests were only run when libcxx was enabled.

I tested this with a RelWithDebInfo build and the tests passed.

Fixes #106475

(cherry picked from commit adf44d5c3ea03569f019740e1140c3205810b3fa)
---
 .../import-std-module/deque-dbg-info-content/Makefile         | 4 +---
 .../import-std-module/list-dbg-info-content/Makefile          | 4 +---
 .../import-std-module/vector-dbg-info-content/Makefile        | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile
index 98638c56f0b98..f938f7428468a 100644
--- a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile
+++ b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile
@@ -1,5 +1,3 @@
-# FIXME: once the expression evaluator can handle std libraries with debug
-# info, change this to USE_LIBCPP=1
-USE_SYSTEM_STDLIB := 1
+USE_LIBCPP := 1
 CXX_SOURCES := main.cpp
 include Makefile.rules
diff --git a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile
index 98638c56f0b98..f938f7428468a 100644
--- a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile
+++ b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile
@@ -1,5 +1,3 @@
-# FIXME: once the expression evaluator can handle std libraries with debug
-# info, change this to USE_LIBCPP=1
-USE_SYSTEM_STDLIB := 1
+USE_LIBCPP := 1
 CXX_SOURCES := main.cpp
 include Makefile.rules
diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile
index 98638c56f0b98..f938f7428468a 100644
--- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile
+++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile
@@ -1,5 +1,3 @@
-# FIXME: once the expression evaluator can handle std libraries with debug
-# info, change this to USE_LIBCPP=1
-USE_SYSTEM_STDLIB := 1
+USE_LIBCPP := 1
 CXX_SOURCES := main.cpp
 include Makefile.rules

From ea3c81afdf928cdc299e4451527b6b9daa7da245 Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12@gmail.com>
Date: Tue, 10 Sep 2024 16:00:40 +0100
Subject: [PATCH 304/427] [lldb][test]
 TestDbgInfoContentVectorFromStdModule.py: skip test on Darwin (#108003)

This started failing on the macOS CI after
https://github.com/llvm/llvm-project/pull/106885:

```
  lldb-api :: commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py

"/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/bin/clang"  -std=c++11 -g -O0 -isysroot "/Applications/Xcode-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.2.sdk" -arch arm64  -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make/../../../../..//include -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/tools/lldb/include -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make -include /Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make/test_common.h  -fno-limit-debug-info    -nostdlib++ -nostdinc++ -cxx-isystem /Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/include/c++/v1  --driver-mode=g++ -MT main.o -MD -MP -MF main.d -c -o main.o /Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/main.cpp
"/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/bin/clang"  main.o -g -O0 -isysroot "/Applications/Xcode-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.2.sdk" -arch arm64  -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make/../../../../..//include -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/tools/lldb/include -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content -I/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make -include /Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make/test_common.h  -fno-limit-debug-info     -L/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/lib -Wl,-rpath,/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/lib -lc++ --driver-mode=g++ -o "a.out"
ld: warning: ignoring duplicate libraries: '-lc++'
codesign --entitlements /Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/llvm-project/lldb/packages/Python/lldbsuite/test/make/entitlements-macos.plist -s - "a.out"
"/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/./bin/dsymutil"  -o "a.out.dSYM" "a.out"

runCmd: settings set target.import-std-module true

output:

runCmd: expr std::reverse(a.begin(), a.end())

Assertion failed: (isa<InjectedClassNameType>(Decl->TypeForDecl)), function getInjectedClassNameType, file ASTContext.cpp, line 5057.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	HandleCommand(command = "expr std::reverse(a.begin(), a.end())")
1.	<eof> parser at end of file
2.	/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/include/c++/v1/__algorithm/reverse.h:54:1: instantiating function definition 'std::reverse<std::__wrap_iter<Foo *>>'
3.	/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/include/c++/v1/__algorithm/reverse.h:47:58: instantiating function definition 'std::__reverse<std::_ClassicAlgPolicy, std::__wrap_iter<Foo *>, std::__wrap_iter<Foo *>>'
4.	/Users/ec2-user/jenkins/workspace/llvm.org/as-lldb-cmake/lldb-build/include/c++/v1/__algorithm/reverse.h:40:1: instantiating function definition 'std::__reverse_impl<std::_ClassicAlgPolicy, std::__wrap_iter<Foo *>>'
```

(cherry picked from commit 2bcab9ba7139cfa96c85433fa85b29c8a6d7008b)
---
 .../TestDbgInfoContentVectorFromStdModule.py                     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
index 397ac6a14cca8..1c32222e64f14 100644
--- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
+++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py
@@ -13,6 +13,7 @@ class TestDbgInfoContentVector(TestBase):
     @skipIf(compiler=no_match("clang"))
     @skipIf(compiler="clang", compiler_version=["<", "12.0"])
     @skipIf(macos_version=["<", "14.0"])
+    @skipIfDarwin  # https://github.com/llvm/llvm-project/issues/106475
     def test(self):
         self.build()
 

From 65b2007799ea4f11f78dc35962a99850a8a78e81 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Tue, 10 Sep 2024 09:28:15 +0800
Subject: [PATCH 305/427] [LoongArch] Codegen for concat_vectors with LASX

Fixes: #107355

Reviewed By: SixWeining

Pull Request: https://github.com/llvm/llvm-project/pull/107523

(cherry picked from commit 1ca411ca451e0e86caf9207779616f32ed9fd908)
---
 .../LoongArch/LoongArchISelLowering.cpp       |  1 +
 .../LoongArch/LoongArchLASXInstrInfo.td       |  6 ++++
 .../CodeGen/LoongArch/lasx/issue107355.ll     | 35 +++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/issue107355.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d80509cf39849..93edafaff553b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -295,6 +295,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
 
       setOperationAction(ISD::SETCC, VT, Legal);
       setOperationAction(ISD::VSELECT, VT, Legal);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 6f1969bf8cae0..0a220a0319bc3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1789,6 +1789,12 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))),
                                                      v4f64:$vj)),
                           sub_128)>;
 
+// XVPERMI_Q
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
+def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)),
+          (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128),
+                     (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>;
+
 } // Predicates = [HasExtLASX]
 
 /// Intrinsic pattern
diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll
new file mode 100644
index 0000000000000..818bd4311615d
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+;; Without this patch(codegen for concat_vectors), the test will hang.
+@g_156 = external global [12 x i32]
+@g_490 = external global i32
+@g_813 = external global i32
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pcalau12i $a0, %got_pc_hi20(g_156)
+; CHECK-NEXT:    ld.d $a0, $a0, %got_pc_lo12(g_156)
+; CHECK-NEXT:    pcalau12i $a1, %got_pc_hi20(g_490)
+; CHECK-NEXT:    ld.d $a1, $a1, %got_pc_lo12(g_490)
+; CHECK-NEXT:    ld.w $a2, $a0, 24
+; CHECK-NEXT:    pcalau12i $a3, %got_pc_hi20(g_813)
+; CHECK-NEXT:    ld.d $a3, $a3, %got_pc_lo12(g_813)
+; CHECK-NEXT:    st.w $zero, $a1, 0
+; CHECK-NEXT:    st.w $a2, $a3, 0
+; CHECK-NEXT:    vrepli.b $vr0, 0
+; CHECK-NEXT:    vst $vr0, $a0, 32
+; CHECK-NEXT:    xvpermi.q $xr0, $xr0, 2
+; CHECK-NEXT:    xvst $xr0, $a0, 0
+; CHECK-NEXT:    st.w $zero, $a0, 20
+; CHECK-NEXT:    ret
+entry:
+  store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4
+  store i32 0, ptr @g_490, align 4
+  %0 = load i32, ptr getelementptr inbounds (i8, ptr @g_156, i64 24), align 4
+  store i32 %0, ptr @g_813, align 4
+  tail call void @llvm.memset.p0.i64(ptr @g_156, i8 0, i64 48, i1 false)
+  store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4
+  ret void
+}

From 87c14bdb1a0e865a50880244bcf48e321220f6d9 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Sun, 15 Sep 2024 09:00:28 -0700
Subject: [PATCH 306/427] [X86] Add test for issue 108722; NFC

(cherry picked from commit 1c378d2b145578948942512f9d6985e37659d013)
---
 llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll | 24 +++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index 67070b989786d..799b541785757 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -994,6 +994,30 @@ define i1 @shr_to_rotate_eq_i32_s5(i32 %x) {
   ret i1 %r
 }
 
+define i32 @issue108722(i32 %0) {
+; CHECK-NOBMI-LABEL: issue108722:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movzbl %dil, %ecx
+; CHECK-NOBMI-NEXT:    shrl $24, %edi
+; CHECK-NOBMI-NEXT:    xorl %eax, %eax
+; CHECK-NOBMI-NEXT:    cmpl %edi, %ecx
+; CHECK-NOBMI-NEXT:    sete %al
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-LABEL: issue108722:
+; CHECK-BMI2:       # %bb.0:
+; CHECK-BMI2-NEXT:    rorxl $8, %edi, %ecx
+; CHECK-BMI2-NEXT:    xorl %eax, %eax
+; CHECK-BMI2-NEXT:    cmpl %edi, %ecx
+; CHECK-BMI2-NEXT:    sete %al
+; CHECK-BMI2-NEXT:    retq
+  %2 = tail call i32 @llvm.fshl.i32(i32 %0, i32 %0, i32 24)
+  %3 = icmp eq i32 %2, %0
+  %4 = zext i1 %3 to i32
+  ret i32 %4
+}
+
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-AVX: {{.*}}
 ; CHECK-NOBMI-SSE2: {{.*}}

From 52a05d380f61f4f04620dc20635a06216c19f276 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Sun, 15 Sep 2024 09:00:34 -0700
Subject: [PATCH 307/427] [X86] Fix missing check of rotate <-> shift
 equivilence (Issue 108722)

Previous code was checking that rotate and shift where equivilent when
transforming shift -> rotate but not the other way around.

Closes #108767

(cherry picked from commit 81279bf97f187eee0446af00d8ae9ec32a22e878)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp   | 2 +-
 llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5a9d679d7002c..45989bcd07d37 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3415,7 +3415,7 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
 
   // We prefer rotate for vectors of if we won't get a zext mask with SRL
   // (PreferRotate will be set in the latter case).
-  if (PreferRotate || VT.isVector())
+  if (PreferRotate || !MayTransformRotate || VT.isVector())
     return ShiftOpc;
 
   // Non-vector type and we have a zext mask with SRL.
diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index 799b541785757..227de9ad0ab69 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -997,8 +997,8 @@ define i1 @shr_to_rotate_eq_i32_s5(i32 %x) {
 define i32 @issue108722(i32 %0) {
 ; CHECK-NOBMI-LABEL: issue108722:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    movzbl %dil, %ecx
-; CHECK-NOBMI-NEXT:    shrl $24, %edi
+; CHECK-NOBMI-NEXT:    movl %edi, %ecx
+; CHECK-NOBMI-NEXT:    roll $24, %ecx
 ; CHECK-NOBMI-NEXT:    xorl %eax, %eax
 ; CHECK-NOBMI-NEXT:    cmpl %edi, %ecx
 ; CHECK-NOBMI-NEXT:    sete %al

From b881b16789706f7242190c9de510188a96b84f99 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Mon, 9 Sep 2024 11:23:47 +0800
Subject: [PATCH 308/427] [C++20] [Modules] Treat constexpr/consteval member
 function as implicitly inline

Close https://github.com/llvm/llvm-project/issues/107673

(cherry picked from commit 74ac96ae1a81c7ecc0e27ff6f45309cff1f2df97)
---
 clang/lib/Sema/SemaDecl.cpp      |  1 +
 clang/test/Modules/pr107673.cppm | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 clang/test/Modules/pr107673.cppm

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index d608dd92a4b47..d3c07d0a76f12 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9732,6 +9732,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     // the function decl is created above).
     // FIXME: We need a better way to separate C++ standard and clang modules.
     bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules ||
+                               NewFD->isConstexpr() || NewFD->isConsteval() ||
                                !NewFD->getOwningModule() ||
                                NewFD->isFromExplicitGlobalModule() ||
                                NewFD->getOwningModule()->isHeaderLikeModule();
diff --git a/clang/test/Modules/pr107673.cppm b/clang/test/Modules/pr107673.cppm
new file mode 100644
index 0000000000000..dc66c9ac2245b
--- /dev/null
+++ b/clang/test/Modules/pr107673.cppm
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s
+export module a;
+export class f {
+public:
+    void non_inline_func() {}
+    constexpr void constexpr_func() {}
+    consteval void consteval_func() {}
+};
+
+// CHECK-NOT: non_inline_func {{.*}}implicit-inline
+// CHECK: constexpr_func {{.*}}implicit-inline
+// CHECK: consteval_func {{.*}}implicit-inline

From a8ddc3ce6b9560caf1b349b01be1267766a4365a Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Wed, 18 Sep 2024 10:07:57 +0800
Subject: [PATCH 309/427] [C++20] [Modules] Treat in class defined member
 functions in language linkage as implicitly inline

Close https://github.com/llvm/llvm-project/issues/108732

This looks liek an oversight mostly.

(cherry picked from commit 7046a9fb05f65f4699a2e88abbcb7dad8a21db2d)
---
 clang/lib/Sema/SemaDecl.cpp      |  2 +-
 clang/test/Modules/pr108732.cppm | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/pr108732.cppm

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index d3c07d0a76f12..717ddb8339584 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9734,7 +9734,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules ||
                                NewFD->isConstexpr() || NewFD->isConsteval() ||
                                !NewFD->getOwningModule() ||
-                               NewFD->isFromExplicitGlobalModule() ||
+                               NewFD->isFromGlobalModule() ||
                                NewFD->getOwningModule()->isHeaderLikeModule();
     bool isInline = D.getDeclSpec().isInlineSpecified();
     bool isVirtual = D.getDeclSpec().isVirtualSpecified();
diff --git a/clang/test/Modules/pr108732.cppm b/clang/test/Modules/pr108732.cppm
new file mode 100644
index 0000000000000..f3b495aa826ce
--- /dev/null
+++ b/clang/test/Modules/pr108732.cppm
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s
+export module mod;
+
+extern "C++" {
+class C
+{
+public:
+bool foo() const {
+    return true;
+}
+};
+}
+
+// CHECK: foo {{.*}}implicit-inline

From a4ace8347084101a2a0b6eaf16eca72ffa59c1f2 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Tue, 17 Sep 2024 21:16:20 -0700
Subject: [PATCH 310/427] [clang-format] Reimplement InsertNewlineAtEOF
 (#108513)

Fixes #108333.

(cherry picked from commit 7153a4bbf6d46e58ce32d59220515c5ab9f35691)
---
 clang/lib/Format/FormatTokenLexer.cpp | 7 +++++++
 clang/lib/Format/TokenAnnotator.cpp   | 5 -----
 clang/unittests/Format/FormatTest.cpp | 6 ++++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index e21b5a882b777..63949b2e26bdc 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -100,6 +100,13 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
   } while (Tokens.back()->isNot(tok::eof));
+  if (Style.InsertNewlineAtEOF) {
+    auto &TokEOF = *Tokens.back();
+    if (TokEOF.NewlinesBefore == 0) {
+      TokEOF.NewlinesBefore = 1;
+      TokEOF.OriginalColumn = 0;
+    }
+  }
   return Tokens;
 }
 
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3f00a28e62988..4512e539cc794 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
   auto *First = Line.First;
   First->SpacesRequiredBefore = 1;
   First->CanBreakBefore = First->MustBreakBefore;
-
-  if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
-      Style.InsertNewlineAtEOF) {
-    First->NewlinesBefore = 1;
-  }
 }
 
 // This function heuristically determines whether 'Current' starts the name of a
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 29200b72d3d00..b7d8fc8ea72c6 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) {
 
   verifyNoChange("int i;\n", Style);
   verifyFormat("int i;\n", "int i;", Style);
+
+  constexpr StringRef Code{"namespace {\n"
+                           "int i;\n"
+                           "} // namespace"};
+  verifyFormat(Code.str() + '\n', Code, Style,
+               {tooling::Range(19, 13)}); // line 3
 }
 
 TEST_F(FormatTest, KeepEmptyLinesAtEOF) {

From 38934af5047d0a9b78820ca6b1edbd5ec5fbcf83 Mon Sep 17 00:00:00 2001
From: Jonathan Tanner <10051116+aDifferentJT@users.noreply.github.com>
Date: Fri, 20 Sep 2024 21:56:40 +0100
Subject: [PATCH 311/427] [AA] Take account of C++23's stricter rules for
 forward declarations (NFC) (#109416)

C++23 has stricter rules for forward declarations around
std::unique_ptr, this means that the inline declaration of the
constructor was failing under clang in C++23 mode, switching to an
out-of-line definition of the constructor fixes this.

This was fairly major impact as it blocked inclusion of a lot of headers
under clang in C++23 mode.

Fixes #106597.

(cherry picked from commit 76bc1eddb2cf8b6cc073649ade21b59bbed438a2)
---
 llvm/include/llvm/Analysis/AliasAnalysis.h | 2 +-
 llvm/lib/Analysis/AliasAnalysis.cpp        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 4140387a1f341..1b5a6ee24b861 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -320,7 +320,7 @@ class AAResults {
 public:
   // Make these results default constructable and movable. We have to spell
   // these out because MSVC won't synthesize them.
-  AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+  AAResults(const TargetLibraryInfo &TLI);
   AAResults(AAResults &&Arg);
   ~AAResults();
 
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index 6eaaad5f332eb..9cdb315b6088f 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -73,6 +73,8 @@ static cl::opt<bool> EnableAATrace("aa-trace", cl::Hidden, cl::init(false));
 static const bool EnableAATrace = false;
 #endif
 
+AAResults::AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+
 AAResults::AAResults(AAResults &&Arg)
     : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {}
 

From 5d41c20edb2210743cc6c721b6274b7ada1a4cac Mon Sep 17 00:00:00 2001
From: goldsteinn <35538541+goldsteinn@users.noreply.github.com>
Date: Fri, 20 Sep 2024 19:57:35 -0500
Subject: [PATCH 312/427] [Inliner] Fix bug where attributes are propagated
 incorrectly (#109347)

- **[Inliner] Add tests for incorrect propagation of return attrs; NFC**
- **[Inliner] Fix bug where attributes are propagated incorrectly**

The bug stems from the fact that we assume the new (inlined) callsite
is calling the same function as the original (callee) callsite. While
this is typically the case, since `VMap` simplifies the new
instructions, callee intrinsics callsites can end up not corresponding
with the same function.

This can lead to buggy propagation.

(cherry picked from commit a9352a0d31862c15146ca863bde165498e9a80e8)
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 20 ++++++--
 .../Inline/access-attributes-prop.ll          | 21 ++++++++
 .../Inline/ret_attr_align_and_noundef.ll      | 51 +++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 9c9fc7a49a9d1..68696789530f4 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1349,7 +1349,8 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
 // Add attributes from CB params and Fn attributes that can always be propagated
 // to the corresponding argument / inner callbases.
 static void AddParamAndFnBasicAttributes(const CallBase &CB,
-                                         ValueToValueMapTy &VMap) {
+                                         ValueToValueMapTy &VMap,
+                                         ClonedCodeInfo &InlinedFunctionInfo) {
   auto *CalledFunction = CB.getCalledFunction();
   auto &Context = CalledFunction->getContext();
 
@@ -1380,6 +1381,11 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
       auto *NewInnerCB = dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB));
       if (!NewInnerCB)
         continue;
+      // The InnerCB might have be simplified during the inlining
+      // process which can make propagation incorrect.
+      if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB))
+        continue;
+
       AttributeList AL = NewInnerCB->getAttributes();
       for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
         // Check if the underlying value for the parameter is an argument.
@@ -1455,7 +1461,8 @@ static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
   return Valid;
 }
 
-static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
+static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
+                                ClonedCodeInfo &InlinedFunctionInfo) {
   AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
   AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
   if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
@@ -1474,6 +1481,11 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
     auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
     if (!NewRetVal)
       continue;
+
+    // The RetVal might have be simplified during the inlining
+    // process which can make propagation incorrect.
+    if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal))
+      continue;
     // Backward propagation of attributes to the returned value may be incorrect
     // if it is control flow dependent.
     // Consider:
@@ -2456,11 +2468,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
 
     // Clone return attributes on the callsite into the calls within the inlined
     // function which feed into its return value.
-    AddReturnAttributes(CB, VMap);
+    AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
 
     // Clone attributes on the params of the callsite to calls within the
     // inlined function which use the same param.
-    AddParamAndFnBasicAttributes(CB, VMap);
+    AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
 
     propagateMemProfMetadata(CalledFunc, CB,
                              InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index 965f0335b63ea..2c55f5f3b1f6c 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -559,3 +559,24 @@ define void @prop_byval_readonly(ptr %p) {
   call void @foo_byval_readonly(ptr %p)
   ret void
 }
+
+define ptr @caller_bad_param_prop(ptr %p1, ptr %p2, i64 %x) {
+; CHECK-LABEL: define {{[^@]+}}@caller_bad_param_prop
+; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr [[P1]](i64 [[X]], ptr [[P2]])
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+  %1 = call ptr %p1(i64 %x, ptr %p2)
+  %2 = call ptr @callee_bad_param_prop(ptr %1)
+  ret ptr %2
+}
+
+define ptr @callee_bad_param_prop(ptr readonly %x) {
+; CHECK-LABEL: define {{[^@]+}}@callee_bad_param_prop
+; CHECK-SAME: (ptr readonly [[X:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 -1)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %r = tail call ptr @llvm.ptrmask(ptr %x, i64 -1)
+  ret ptr %r
+}
diff --git a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll
index f4cebf1fcb5da..930bef43df1db 100644
--- a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll
+++ b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll
@@ -410,3 +410,54 @@ define i8 @caller15_okay_intersect_ranges() {
   call void @use.val(i8 %r)
   ret i8 %r
 }
+
+define i8 @caller16_not_intersecting_ranges() {
+; CHECK-LABEL: define i8 @caller16_not_intersecting_ranges() {
+; CHECK-NEXT:    [[R_I:%.*]] = call range(i8 0, 0) i8 @val8()
+; CHECK-NEXT:    call void @use.val(i8 [[R_I]])
+; CHECK-NEXT:    ret i8 [[R_I]]
+;
+  %r = call range(i8 0, 5) i8 @callee15()
+  call void @use.val(i8 %r)
+  ret i8 %r
+}
+
+
+define ptr @caller_bad_ret_prop(ptr %p1, ptr %p2, i64 %x, ptr %other) {
+; CHECK-LABEL: define ptr @caller_bad_ret_prop
+; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]], i64 [[X:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call noundef ptr [[P1]](i64 [[X]], ptr [[P2]])
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq ptr [[TMP1]], null
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[T_I:%.*]], label [[F_I:%.*]]
+; CHECK:       T.i:
+; CHECK-NEXT:    br label [[CALLEE_BAD_RET_PROP_EXIT:%.*]]
+; CHECK:       F.i:
+; CHECK-NEXT:    br label [[CALLEE_BAD_RET_PROP_EXIT]]
+; CHECK:       callee_bad_ret_prop.exit:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi ptr [ [[OTHER]], [[T_I]] ], [ [[TMP1]], [[F_I]] ]
+; CHECK-NEXT:    ret ptr [[TMP2]]
+;
+  %1 = call noundef ptr %p1(i64 %x, ptr %p2)
+  %2 = call nonnull ptr @callee_bad_ret_prop(ptr %1, ptr %other)
+  ret ptr %2
+}
+
+define ptr @callee_bad_ret_prop(ptr %x, ptr %other) {
+; CHECK-LABEL: define ptr @callee_bad_ret_prop
+; CHECK-SAME: (ptr [[X:%.*]], ptr [[OTHER:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[X]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret ptr [[OTHER]]
+; CHECK:       F:
+; CHECK-NEXT:    [[R:%.*]] = tail call ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 -1)
+; CHECK-NEXT:    ret ptr [[R]]
+;
+  %cmp = icmp eq ptr %x, null
+  br i1 %cmp, label %T, label %F
+T:
+  ret ptr %other
+F:
+  %r = tail call ptr @llvm.ptrmask(ptr %x, i64 -1)
+  ret ptr %r
+}

From c011dce7c2834942d35c2c77f11dafa4bb83f28e Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 28 Aug 2024 18:22:57 -0700
Subject: [PATCH 313/427] workflows/release-binaries: Enable flang builds on
 Windows (#101344)

Flang for Windows depends on compiler-rt, so we need to enable it for
the stage1 builds. This also fixes failures building the flang tests on
macOS.

Fixes #100202.

(cherry picked from commit 8927576b8f6442bb6129bda597efee46176f8aec)
---
 .github/workflows/release-binaries.yml | 8 --------
 clang/cmake/caches/Release.cmake       | 7 +++++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index fcd371d49e6c9..925912df6843e 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -135,16 +135,8 @@ jobs:
           target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches"
         fi
 
-        # x86 macOS and x86 Windows have trouble building flang, so disable it.
-        # Windows: https://github.com/llvm/llvm-project/issues/100202
-        # macOS: 'rebase opcodes terminated early at offset 1 of 80016' when building __fortran_builtins.mod
         build_flang="true"
 
-        if [ "$target" = "Windows-X64" ]; then
-          target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS=\"clang;lld;lldb;clang-tools-extra;bolt;polly;mlir\""
-          build_flang="false"
-        fi
-
         if [ "${{ runner.os }}" = "Windows" ]; then
           # The build times out on Windows, so we need to disable LTO.
           target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_LTO=OFF"
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index e5161dd9a27b9..6d5f75ca0074e 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -47,11 +47,14 @@ set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
 
 set(STAGE1_PROJECTS "clang")
-set(STAGE1_RUNTIMES "")
+
+# Building Flang on Windows requires compiler-rt, so we need to build it in
+# stage1.  compiler-rt is also required for building the Flang tests on
+# macOS.
+set(STAGE1_RUNTIMES "compiler-rt")
 
 if (LLVM_RELEASE_ENABLE_PGO)
   list(APPEND STAGE1_PROJECTS "lld")
-  list(APPEND STAGE1_RUNTIMES "compiler-rt")
   set(CLANG_BOOTSTRAP_TARGETS
     generate-profdata
     stage2-package

From 1c499a7200692e9428111829a4e323b452823825 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Tue, 10 Sep 2024 13:47:17 +1000
Subject: [PATCH 314/427] [ORC] Remove EDU from dependants list of dependencies
 before destroying.

Dependant lists hold raw pointers back to EDUs that depend on them. We need to
remove these entries before destroying the EDU or we'll be left with a dangling
reference that can result in use-after-free bugs.

No testcase: This has only been observed in multi-threaded setups that
reproduce the issue inconsistently.

rdar://135403614
(cherry picked from commit 7034ec491251e598d2867199f89fefa3eb16a1a0)
---
 llvm/lib/ExecutionEngine/Orc/Core.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 3e6de62c8b7dd..f70c2890521d3 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -3592,6 +3592,21 @@ ExecutionSession::IL_failSymbols(JITDylib &JD,
       assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) &&
              "Symbol does not appear in its DefiningEDU");
       MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name));
+
+      // Remove this EDU from the dependants lists of its dependencies.
+      for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) {
+        for (auto DepSym : DepSyms) {
+          assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) &&
+                 "DepSym not in DepJD");
+          assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) &&
+                 "DepSym has not MaterializingInfo");
+          auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)];
+          assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) &&
+                 "DefiningEDU missing from DependantEDUs list of dependency");
+          SymMI.DependantEDUs.erase(MI.DefiningEDU.get());
+        }
+      }
+
       MI.DefiningEDU = nullptr;
     } else {
       // Otherwise if there are any EDUs waiting on this symbol then move

From 4c51d827e58aaa8c5b3d75b3b61a43627ab53491 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 24 Sep 2024 15:37:02 +0800
Subject: [PATCH 315/427] [C++20] [Modules] Add Decl::isFromGlobalModule

---
 clang/include/clang/AST/DeclBase.h | 3 +++
 clang/lib/AST/DeclBase.cpp         | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 2a4bd0f9c2fda..04dbd1db6cba8 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -680,6 +680,9 @@ class alignas(8) Decl {
   /// Whether this declaration comes from explicit global module.
   bool isFromExplicitGlobalModule() const;
 
+  /// Whether this declaration comes from global module.
+  bool isFromGlobalModule() const;
+
   /// Whether this declaration comes from a named module.
   bool isInNamedModule() const;
 
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index b59f118380ca4..c4e948a38e264 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1161,6 +1161,10 @@ bool Decl::isFromExplicitGlobalModule() const {
   return getOwningModule() && getOwningModule()->isExplicitGlobalModule();
 }
 
+bool Decl::isFromGlobalModule() const {
+  return getOwningModule() && getOwningModule()->isGlobalModule();
+}
+
 bool Decl::isInNamedModule() const {
   return getOwningModule() && getOwningModule()->isNamedModule();
 }

From 8a25c601eb64bcdb7c6c74bee52655468dfdd91b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 25 Sep 2024 12:40:14 -0400
Subject: [PATCH 316/427] [libc++] Disable the clang-tidy checks to get CI back
 (#109989)

The CI has been a complete mess for the past week, and the only thing
preventing it from being back is the Clang tidy checks. Disable them (as
a total hack) to get CI back.

(cherry picked from commit 78c6506543dee13c9335edc5c85bc73c4853fbd7)
---
 libcxx/test/tools/clang_tidy_checks/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
index f0289dc44c662..125b2184a49ea 100644
--- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
+++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
@@ -1,3 +1,5 @@
+# TODO: Re-enable the tests once the CI is back under control
+return()
 
 # The find_package changes these variables. This leaves the build in an odd
 # state. Calling cmake a second time tries to write site config information in

From 149bfdd61c961edbf49c2ea7fadf9d3c1a79a55e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Wed, 4 Sep 2024 12:19:46 +0800
Subject: [PATCH 317/427] [Clang][CodeGen] Fix type for atomic float incdec
 operators (#107075)

`llvm::ConstantFP::get(llvm::LLVMContext&, APFloat(float))` always
returns a f32 constant.
Fix https://github.com/llvm/llvm-project/issues/107054.
---
 clang/lib/CodeGen/CGExprScalar.cpp            |  26 +-
 clang/test/CodeGen/X86/x86-atomic-double.c    |  88 +++---
 .../test/CodeGen/X86/x86-atomic-long_double.c | 293 ++++++++++++++----
 3 files changed, 300 insertions(+), 107 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index a17d68424bbce..6e212e74676e8 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2833,18 +2833,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                   llvm::AtomicOrdering::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
-    // Special case for atomic increment/decrement on floats
+    // Special case for atomic increment/decrement on floats.
+    // Bail out non-power-of-2-sized floating point types (e.g., x86_fp80).
     if (type->isFloatingType()) {
-      llvm::AtomicRMWInst::BinOp aop =
-          isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
-      llvm::Instruction::BinaryOps op =
-          isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
-      llvm::Value *amt = llvm::ConstantFP::get(
-          VMContext, llvm::APFloat(static_cast<float>(1.0)));
-      llvm::Value *old =
-          Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
-                                  llvm::AtomicOrdering::SequentiallyConsistent);
-      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+      llvm::Type *Ty = ConvertType(type);
+      if (llvm::has_single_bit(Ty->getScalarSizeInBits())) {
+        llvm::AtomicRMWInst::BinOp aop =
+            isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+        llvm::Instruction::BinaryOps op =
+            isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+        llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0);
+        llvm::AtomicRMWInst *old = Builder.CreateAtomicRMW(
+            aop, LV.getAddress(), amt,
+            llvm::AtomicOrdering::SequentiallyConsistent);
+
+        return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+      }
     }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
     input = value;
diff --git a/clang/test/CodeGen/X86/x86-atomic-double.c b/clang/test/CodeGen/X86/x86-atomic-double.c
index 2354c89cc2b17..09c8f70c3db85 100644
--- a/clang/test/CodeGen/X86/x86-atomic-double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-double.c
@@ -6,20 +6,14 @@
 // X64-LABEL: define dso_local double @test_double_post_inc(
 // X64-SAME: ) #[[ATTR0:[0-9]+]] {
 // X64-NEXT:  entry:
-// X64-NEXT:    [[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8
-// X64-NEXT:    store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:    ret double [[TMP1]]
+// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.000000e+00 seq_cst, align 8
+// X64-NEXT:    ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_double_post_inc(
 // X86-SAME: ) #[[ATTR0:[0-9]+]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    [[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8
-// X86-NEXT:    store float [[TMP0]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret double [[TMP1]]
+// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.000000e+00 seq_cst, align 8
+// X86-NEXT:    ret double [[TMP0]]
 //
 double test_double_post_inc()
 {
@@ -30,20 +24,14 @@ double test_double_post_inc()
 // X64-LABEL: define dso_local double @test_double_post_dc(
 // X64-SAME: ) #[[ATTR0]] {
 // X64-NEXT:  entry:
-// X64-NEXT:    [[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8
-// X64-NEXT:    store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:    ret double [[TMP1]]
+// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, double 1.000000e+00 seq_cst, align 8
+// X64-NEXT:    ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_double_post_dc(
 // X86-SAME: ) #[[ATTR0]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    [[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8
-// X86-NEXT:    store float [[TMP0]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret double [[TMP1]]
+// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, double 1.000000e+00 seq_cst, align 8
+// X86-NEXT:    ret double [[TMP0]]
 //
 double test_double_post_dc()
 {
@@ -54,22 +42,16 @@ double test_double_post_dc()
 // X64-LABEL: define dso_local double @test_double_pre_dc(
 // X64-SAME: ) #[[ATTR0]] {
 // X64-NEXT:  entry:
-// X64-NEXT:    [[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8
-// X64-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
-// X64-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 8
-// X64-NEXT:    [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:    ret double [[TMP2]]
+// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, double 1.000000e+00 seq_cst, align 8
+// X64-NEXT:    [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
+// X64-NEXT:    ret double [[TMP1]]
 //
 // X86-LABEL: define dso_local double @test_double_pre_dc(
 // X86-SAME: ) #[[ATTR0]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    [[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8
-// X86-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
-// X86-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret double [[TMP2]]
+// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, double 1.000000e+00 seq_cst, align 8
+// X86-NEXT:    [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
+// X86-NEXT:    ret double [[TMP1]]
 //
 double test_double_pre_dc()
 {
@@ -80,25 +62,43 @@ double test_double_pre_dc()
 // X64-LABEL: define dso_local double @test_double_pre_inc(
 // X64-SAME: ) #[[ATTR0]] {
 // X64-NEXT:  entry:
-// X64-NEXT:    [[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8
-// X64-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
-// X64-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 8
-// X64-NEXT:    [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:    ret double [[TMP2]]
+// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, double 1.000000e+00 seq_cst, align 8
+// X64-NEXT:    [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+// X64-NEXT:    ret double [[TMP1]]
 //
 // X86-LABEL: define dso_local double @test_double_pre_inc(
 // X86-SAME: ) #[[ATTR0]] {
 // X86-NEXT:  entry:
-// X86-NEXT:    [[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8
-// X86-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
-// X86-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret double [[TMP2]]
+// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, double 1.000000e+00 seq_cst, align 8
+// X86-NEXT:    [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+// X86-NEXT:    ret double [[TMP1]]
 //
 double test_double_pre_inc()
 {
     static _Atomic double n;
     return ++n;
 }
+
+// X64-LABEL: define dso_local i32 @pr107054(
+// X64-SAME: ) #[[ATTR0]] {
+// X64-NEXT:  entry:
+// X64-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @pr107054.n, double 1.000000e+00 seq_cst, align 8
+// X64-NEXT:    [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+// X64-NEXT:    [[CMP:%.*]] = fcmp oeq double [[TMP1]], 1.000000e+00
+// X64-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+// X64-NEXT:    ret i32 [[CONV]]
+//
+// X86-LABEL: define dso_local i32 @pr107054(
+// X86-SAME: ) #[[ATTR0]] {
+// X86-NEXT:  entry:
+// X86-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @pr107054.n, double 1.000000e+00 seq_cst, align 8
+// X86-NEXT:    [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
+// X86-NEXT:    [[CMP:%.*]] = fcmp oeq double [[TMP1]], 1.000000e+00
+// X86-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+// X86-NEXT:    ret i32 [[CONV]]
+//
+int pr107054()
+{
+    static _Atomic double n;
+    return (++n) == 1;
+}
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index 2c3f381f13511..9c82784807dac 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -4,29 +4,60 @@
 
 // X64-LABEL: define dso_local x86_fp80 @testinc(
 // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
-// X64-NEXT:  [[ENTRY:.*:]]
-// X64-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:  [[ENTRY:.*]]:
 // X64-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// X64-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
 // X64-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
 // X64-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
-// X64-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
-// X64-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 16
-// X64-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
-// X64-NEXT:    ret x86_fp80 [[TMP3]]
+// X64-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16
+// X64-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X64:       [[ATOMIC_OP]]:
+// X64-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ]
+// X64-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// X64-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// X64-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// X64-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X64:       [[ATOMIC_CONT]]:
+// X64-NEXT:    ret x86_fp80 [[INC]]
 //
 // X86-LABEL: define dso_local x86_fp80 @testinc(
 // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
-// X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:  [[ENTRY:.*]]:
 // X86-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
 // X86-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
-// X86-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
-// X86-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret x86_fp80 [[TMP3]]
+// X86-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// X86-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// X86-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X86:       [[ATOMIC_OP]]:
+// X86-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ]
+// X86-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4
+// X86-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86:       [[ATOMIC_CONT]]:
+// X86-NEXT:    ret x86_fp80 [[INC]]
 //
 long double testinc(_Atomic long double *addr) {
 
@@ -35,27 +66,60 @@ long double testinc(_Atomic long double *addr) {
 
 // X64-LABEL: define dso_local x86_fp80 @testdec(
 // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X64-NEXT:  [[ENTRY:.*:]]
-// X64-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:  [[ENTRY:.*]]:
 // X64-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// X64-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
 // X64-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
 // X64-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
-// X64-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 16
-// X64-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
-// X64-NEXT:    ret x86_fp80 [[TMP2]]
+// X64-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16
+// X64-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X64:       [[ATOMIC_OP]]:
+// X64-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ]
+// X64-NEXT:    [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// X64-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// X64-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// X64-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X64:       [[ATOMIC_CONT]]:
+// X64-NEXT:    ret x86_fp80 [[TMP1]]
 //
 // X86-LABEL: define dso_local x86_fp80 @testdec(
 // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:  [[ENTRY:.*]]:
 // X86-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
 // X86-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
-// X86-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret x86_fp80 [[TMP2]]
+// X86-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// X86-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// X86-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X86:       [[ATOMIC_OP]]:
+// X86-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ]
+// X86-NEXT:    [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 4
+// X86-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86:       [[ATOMIC_CONT]]:
+// X86-NEXT:    ret x86_fp80 [[TMP1]]
 //
 long double testdec(_Atomic long double *addr) {
 
@@ -175,29 +239,60 @@ long double testassign(_Atomic long double *addr) {
 
 // X64-LABEL: define dso_local x86_fp80 @test_volatile_inc(
 // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X64-NEXT:  [[ENTRY:.*:]]
-// X64-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:  [[ENTRY:.*]]:
 // X64-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// X64-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
 // X64-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
 // X64-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
-// X64-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
-// X64-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 16
-// X64-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
-// X64-NEXT:    ret x86_fp80 [[TMP3]]
+// X64-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16
+// X64-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X64:       [[ATOMIC_OP]]:
+// X64-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ]
+// X64-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// X64-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// X64-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// X64-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X64:       [[ATOMIC_CONT]]:
+// X64-NEXT:    ret x86_fp80 [[INC]]
 //
 // X86-LABEL: define dso_local x86_fp80 @test_volatile_inc(
 // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:  [[ENTRY:.*]]:
 // X86-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
 // X86-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
-// X86-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
-// X86-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret x86_fp80 [[TMP3]]
+// X86-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// X86-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// X86-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X86:       [[ATOMIC_OP]]:
+// X86-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ]
+// X86-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4
+// X86-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86:       [[ATOMIC_CONT]]:
+// X86-NEXT:    ret x86_fp80 [[INC]]
 //
 long double test_volatile_inc(volatile _Atomic long double *addr) {
   return ++*addr;
@@ -205,27 +300,60 @@ long double test_volatile_inc(volatile _Atomic long double *addr) {
 
 // X64-LABEL: define dso_local x86_fp80 @test_volatile_dec(
 // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X64-NEXT:  [[ENTRY:.*:]]
-// X64-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:  [[ENTRY:.*]]:
 // X64-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// X64-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
 // X64-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
 // X64-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
-// X64-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
-// X64-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 16
-// X64-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
-// X64-NEXT:    ret x86_fp80 [[TMP2]]
+// X64-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16
+// X64-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X64:       [[ATOMIC_OP]]:
+// X64-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ]
+// X64-NEXT:    [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// X64-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// X64-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// X64-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X64:       [[ATOMIC_CONT]]:
+// X64-NEXT:    ret x86_fp80 [[TMP1]]
 //
 // X86-LABEL: define dso_local x86_fp80 @test_volatile_dec(
 // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
-// X86-NEXT:  [[ENTRY:.*:]]
-// X86-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:  [[ENTRY:.*]]:
 // X86-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// X86-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
 // X86-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
 // X86-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
-// X86-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
-// X86-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
-// X86-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
-// X86-NEXT:    ret x86_fp80 [[TMP2]]
+// X86-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// X86-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// X86-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X86:       [[ATOMIC_OP]]:
+// X86-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ]
+// X86-NEXT:    [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 4
+// X86-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86:       [[ATOMIC_CONT]]:
+// X86-NEXT:    ret x86_fp80 [[TMP1]]
 //
 long double test_volatile_dec(volatile _Atomic long double *addr) {
   return (*addr)--;
@@ -341,3 +469,64 @@ long double test_volatile_assign(volatile _Atomic long double *addr) {
 
   return *addr;
 }
+
+// X64-LABEL: define dso_local i32 @pr107054(
+// X64-SAME: ) #[[ATTR0]] {
+// X64-NEXT:  [[ENTRY:.*]]:
+// X64-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
+// X64-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr @pr107054.n seq_cst, align 16
+// X64-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// X64-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X64:       [[ATOMIC_OP]]:
+// X64-NEXT:    [[TMP1:%.*]] = phi x86_fp80 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[ATOMIC_OP]] ]
+// X64-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP1]], 0xK3FFF8000000000000000
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    [[TMP2:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// X64-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// X64-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// X64-NEXT:    [[TMP4:%.*]] = cmpxchg ptr @pr107054.n, i128 [[TMP2]], i128 [[TMP3]] seq_cst seq_cst, align 16
+// X64-NEXT:    [[TMP5:%.*]] = extractvalue { i128, i1 } [[TMP4]], 0
+// X64-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1
+// X64-NEXT:    store i128 [[TMP5]], ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    [[TMP7]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// X64-NEXT:    br i1 [[TMP6]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X64:       [[ATOMIC_CONT]]:
+// X64-NEXT:    [[CMP:%.*]] = fcmp oeq x86_fp80 [[INC]], 0xK3FFF8000000000000000
+// X64-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+// X64-NEXT:    ret i32 [[CONV]]
+//
+// X86-LABEL: define dso_local i32 @pr107054(
+// X86-SAME: ) #[[ATTR0]] {
+// X86-NEXT:  [[ENTRY:.*]]:
+// X86-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
+// X86-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef @pr107054.n, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// X86-NEXT:    [[TMP0:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// X86-NEXT:    br label %[[ATOMIC_OP:.*]]
+// X86:       [[ATOMIC_OP]]:
+// X86-NEXT:    [[TMP1:%.*]] = phi x86_fp80 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ]
+// X86-NEXT:    [[INC:%.*]] = fadd x86_fp80 [[TMP1]], 0xK3FFF8000000000000000
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// X86-NEXT:    store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4
+// X86-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef @pr107054.n, ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// X86-NEXT:    [[TMP2]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// X86-NEXT:    br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]]
+// X86:       [[ATOMIC_CONT]]:
+// X86-NEXT:    [[CMP:%.*]] = fcmp oeq x86_fp80 [[INC]], 0xK3FFF8000000000000000
+// X86-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+// X86-NEXT:    ret i32 [[CONV]]
+//
+int pr107054()
+{
+    static _Atomic long double n;
+    return (++n) == 1;
+}

From b3731b36421e23737be2b4785700267b96c3241f Mon Sep 17 00:00:00 2001
From: Princeton Ferro <pferro@nvidia.com>
Date: Wed, 4 Sep 2024 07:18:53 -0700
Subject: [PATCH 318/427] [DAGCombiner] cache negative result from
 getMergeStoreCandidates() (#106949)

Cache negative search result from getStoreMergeCandidates() so that
mergeConsecutiveStores() does not iterate quadratically over a
potentially long sequence of unmergeable stores.

(cherry picked from commit 8f77d37f256809766fd83a09c6d144b785e9165a)
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 ++++++++++++-------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71cdec91e5f67..7b1f1dc40211d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -191,6 +191,11 @@ namespace {
     // AA - Used for DAG load/store alias analysis.
     AliasAnalysis *AA;
 
+    /// This caches all chains that have already been processed in
+    /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
+    /// stores candidates.
+    SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
+
     /// When an instruction is simplified, add all users of the instruction to
     /// the work lists because they might get more simplified now.
     void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
                                          bool UseTrunc);
 
     /// This is a helper function for mergeConsecutiveStores. Stores that
-    /// potentially may be merged with St are placed in StoreNodes. RootNode is
-    /// a chain predecessor to all store candidates.
-    void getStoreMergeCandidates(StoreSDNode *St,
-                                 SmallVectorImpl<MemOpLink> &StoreNodes,
-                                 SDNode *&Root);
+    /// potentially may be merged with St are placed in StoreNodes. On success,
+    /// returns a chain predecessor to all store candidates.
+    SDNode *getStoreMergeCandidates(StoreSDNode *St,
+                                    SmallVectorImpl<MemOpLink> &StoreNodes);
 
     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
     /// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
     ++NodesCombined;
 
+    // Invalidate cached info.
+    ChainsWithoutMergeableStores.clear();
+
     // If we get back the same node we passed in, rather than a new node or
     // zero, we know that the node must have defined multiple values and
     // CombineTo was used.  Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
   return true;
 }
 
-void DAGCombiner::getStoreMergeCandidates(
-    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
-    SDNode *&RootNode) {
+SDNode *
+DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
+                                     SmallVectorImpl<MemOpLink> &StoreNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer. We must have a base and an offset. Do not handle stores to undef
   // base pointers.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
-    return;
+    return nullptr;
 
   SDValue Val = peekThroughBitcasts(St->getValue());
   StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
     LoadVT = Ld->getMemoryVT();
     // Load and store should be the same type.
     if (MemVT != LoadVT)
-      return;
+      return nullptr;
     // Loads must only have one use.
     if (!Ld->hasNUsesOfValue(1, 0))
-      return;
+      return nullptr;
     // The memory operands must not be volatile/indexed/atomic.
     // TODO: May be able to relax for unordered atomics (see D66309)
     if (!Ld->isSimple() || Ld->isIndexed())
-      return;
+      return nullptr;
   }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
                             int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
 
+  // We are looking for a root node which is an ancestor to all mergable
+  // stores. We search up through a load, to our root and then down
+  // through all children. For instance we will find Store{1,2,3} if
+  // St is Store1, Store2. or Store3 where the root is not a load
+  // which always true for nonvolatile ops. TODO: Expand
+  // the search to find all valid candidates through multiple layers of loads.
+  //
+  // Root
+  // |-------|-------|
+  // Load    Load    Store3
+  // |       |
+  // Store1   Store2
+  //
+  // FIXME: We should be able to climb and
+  // descend TokenFactors to find candidates as well.
+
+  SDNode *RootNode = St->getChain().getNode();
+  // Bail out if we already analyzed this root node and found nothing.
+  if (ChainsWithoutMergeableStores.contains(RootNode))
+    return nullptr;
+
   // Check if the pair of StoreNode and the RootNode already bail out many
   // times which is over the limit in dependence check.
   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20494,28 +20522,13 @@ void DAGCombiner::getStoreMergeCandidates(
     }
   };
 
-  // We looking for a root node which is an ancestor to all mergable
-  // stores. We search up through a load, to our root and then down
-  // through all children. For instance we will find Store{1,2,3} if
-  // St is Store1, Store2. or Store3 where the root is not a load
-  // which always true for nonvolatile ops. TODO: Expand
-  // the search to find all valid candidates through multiple layers of loads.
-  //
-  // Root
-  // |-------|-------|
-  // Load    Load    Store3
-  // |       |
-  // Store1   Store2
-  //
-  // FIXME: We should be able to climb and
-  // descend TokenFactors to find candidates as well.
-
-  RootNode = St->getChain().getNode();
-
   unsigned NumNodesExplored = 0;
   const unsigned MaxSearchNodes = 1024;
   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
     RootNode = Ldn->getChain().getNode();
+    // Bail out if we already analyzed this root node and found nothing.
+    if (ChainsWithoutMergeableStores.contains(RootNode))
+      return nullptr;
     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20532,6 +20545,8 @@ void DAGCombiner::getStoreMergeCandidates(
          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
       TryToAddCandidate(I);
   }
+
+  return RootNode;
 }
 
 // We need to check that merging these stores does not cause a loop in the
@@ -21162,9 +21177,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
     return false;
 
   SmallVector<MemOpLink, 8> StoreNodes;
-  SDNode *RootNode;
   // Find potential store merge candidates by searching through chain sub-DAG
-  getStoreMergeCandidates(St, StoreNodes, RootNode);
+  SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
 
   // Check if there is anything to merge.
   if (StoreNodes.size() < 2)
@@ -21220,6 +21234,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
       llvm_unreachable("Unhandled store source type");
     }
   }
+
+  // Remember if we failed to optimize, to save compile time.
+  if (!MadeChange)
+    ChainsWithoutMergeableStores.insert(RootNode);
+
   return MadeChange;
 }
 

From a7554dfc222b13624426ebd6ef46e122b9c16ee7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 10 Sep 2024 09:19:39 +0800
Subject: [PATCH 319/427]  [LoongArch][ISel] Check the number of sign bits in
 `PatGprGpr_32` (#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)
---
 .../Target/LoongArch/LoongArchInstrInfo.td    |  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll     | 67 ++++++++++++++++++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a4277873..339d50bd81921 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
-    : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
+    : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr<SDPatternOperator OpNode, LAInst Inst>
     : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c..c22acdb496907 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.w $a0, $a0, $a1
+; LA64-NEXT:    div.d $a0, $a0, $a1
+; LA64-NEXT:    addi.w $a0, $a0, 0
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    .cfi_def_cfa_offset 16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    .cfi_offset 1, -4
+; LA32-NEXT:    move $a2, $a0
+; LA32-NEXT:    srai.w $a3, $a0, 31
+; LA32-NEXT:    lu12i.w $a0, -266831
+; LA32-NEXT:    ori $a0, $a0, 3337
+; LA32-NEXT:    move $a1, $zero
+; LA32-NEXT:    bl %plt(__divdi3)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: pr107414:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a1, -266831
+; LA64-NEXT:    ori $a1, $a1, 3337
+; LA64-NEXT:    lu32i.d $a1, 0
+; LA64-NEXT:    div.d $a0, $a1, $a0
+; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:       # %bb.0: # %entry
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:    .cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:    .cfi_offset 1, -4
+; LA32-TRAP-NEXT:    move $a2, $a0
+; LA32-TRAP-NEXT:    srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:    lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:    ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:    move $a1, $zero
+; LA32-TRAP-NEXT:    bl %plt(__divdi3)
+; LA32-TRAP-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:    addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:    ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:       # %bb.0: # %entry
+; LA64-TRAP-NEXT:    lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:    ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:    lu32i.d $a1, 0
+; LA64-TRAP-NEXT:    div.d $a1, $a1, $a0
+; LA64-TRAP-NEXT:    bnez $a0, .LBB32_2
+; LA64-TRAP-NEXT:  # %bb.1: # %entry
+; LA64-TRAP-NEXT:    break 7
+; LA64-TRAP-NEXT:  .LBB32_2: # %entry
+; LA64-TRAP-NEXT:    addi.w $a0, $a1, 0
+; LA64-TRAP-NEXT:    ret
+entry:
+  %conv = sext i32 %x to i64
+  %div = sdiv i64 3202030857, %conv
+  %conv1 = trunc i64 %div to i32
+  ret i32 %conv1
+}

From 99058521d4c80635f60b2c1442b683395e0ee818 Mon Sep 17 00:00:00 2001
From: hev <wangrui@loongson.cn>
Date: Tue, 10 Sep 2024 16:52:21 +0800
Subject: [PATCH 320/427] [LoongArch] Eliminate the redundant sign extension of
 division (#107971)

If all incoming values of `div.d` are sign-extended and all users only
use the lower 32 bits, then convert them to W versions.

Fixes: #107946
(cherry picked from commit 0f47e3aebdd2a4a938468a272ea4224552dbf176)
---
 llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp | 15 +++++++++++++++
 .../ir-instruction/sdiv-udiv-srem-urem.ll         |  6 ++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index abac69054f3b9..ab90409fdf47d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
         break;
       }
       return false;
+    // If all incoming values are sign-extended and all users only use
+    // the lower 32 bits, then convert them to W versions.
+    case LoongArch::DIV_D: {
+      if (!AddRegToWorkList(MI->getOperand(1).getReg()))
+        return false;
+      if (!AddRegToWorkList(MI->getOperand(2).getReg()))
+        return false;
+      if (hasAllWUsers(*MI, ST, MRI)) {
+        FixableDef.insert(MI);
+        break;
+      }
+      return false;
+    }
     }
   }
 
@@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) {
     return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
     return LoongArch::ADD_W;
+  case LoongArch::DIV_D:
+    return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
     return LoongArch::LD_W;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c22acdb496907..c5af79157eaad 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    addi.w $a1, $a1, 0
 ; LA64-NEXT:    addi.w $a0, $a0, 0
-; LA64-NEXT:    div.d $a0, $a0, $a1
-; LA64-NEXT:    addi.w $a0, $a0, 0
+; LA64-NEXT:    div.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:       # %bb.0: # %entry
 ; LA64-TRAP-NEXT:    addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:    div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:    div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:    bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:    break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:    addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:    ret
 entry:
   %r = sdiv i32 %a, %b

From aaa7027716ad347cda75865e99a2ff654bed6bf1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 20 Sep 2024 16:57:46 +0200
Subject: [PATCH 321/427] [LoopPeel] Fix LCSSA phi node invalidation

In the test case, the BECount of the second loop uses %load,
but we only have an LCSSA phi node for %add, so that is what
gets invalidated. Use the forgetLcssaPhiWithNewPredecessor()
API instead, which will invalidate the roots of the expression
instead.

Fixes https://github.com/llvm/llvm-project/issues/109333.

(cherry picked from commit 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16)
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp      |   2 +-
 llvm/test/Transforms/LoopUnroll/pr109333.ll | 104 ++++++++++++++++++++
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/pr109333.ll

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc..760f1619e030c 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -859,7 +859,7 @@ static void cloneLoopBlocks(
       if (LatchInst && L->contains(LatchInst))
         LatchVal = VMap[LatchVal];
       PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
-      SE.forgetValue(&PHI);
+      SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
     }
 
   // LastValueMap is updated with the values for the current loop
diff --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll b/llvm/test/Transforms/LoopUnroll/pr109333.ll
new file mode 100644
index 0000000000000..f7ac911a78207
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes="print<scalar-evolution>,loop-unroll" -unroll-runtime < %s 2>/dev/null | FileCheck %s
+
+; Make sure we use %add.lcssa rather than %load when expanding the
+; backedge taken count.
+
+define void @test(i1 %c, ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP_1_PEEL_BEGIN:.*]]
+; CHECK:       [[LOOP_1_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[LOOP_1_PEEL:.*]]
+; CHECK:       [[LOOP_1_PEEL]]:
+; CHECK-NEXT:    [[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:    [[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1
+; CHECK-NEXT:    br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]]
+; CHECK:       [[LOOP_1_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[LOOP_1_PEEL_NEXT1:.*]]
+; CHECK:       [[LOOP_1_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:       [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[LOOP_1:.*]]
+; CHECK:       [[LOOP_1]]:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[LOAD]], 1
+; CHECK-NEXT:    br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[IF_LOOPEXIT]]:
+; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ]
+; CHECK-NEXT:    br label %[[IF]]
+; CHECK:       [[IF]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] ], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 7
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], label %[[LOOP_2_PROL_LOOPEXIT:.*]]
+; CHECK:       [[LOOP_2_PROL_PREHEADER]]:
+; CHECK-NEXT:    br label %[[LOOP_2_PROL:.*]]
+; CHECK:       [[LOOP_2_PROL]]:
+; CHECK-NEXT:    [[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] ], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:    [[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8
+; CHECK-NEXT:    [[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]]
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label %[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:    br label %[[LOOP_2_PROL_LOOPEXIT]]
+; CHECK:       [[LOOP_2_PROL_LOOPEXIT]]:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi ptr [ [[P]], %[[IF]] ], [ [[IV_UNR_PH]], %[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[IF_NEW:.*]]
+; CHECK:       [[IF_NEW]]:
+; CHECK-NEXT:    br label %[[LOOP_2:.*]]
+; CHECK:       [[LOOP_2]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi ptr [ [[IV_UNR]], %[[IF_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP_2]] ]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = getelementptr i8, ptr [[IV]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = getelementptr i8, ptr [[IV_NEXT]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = getelementptr i8, ptr [[IV_NEXT_1]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = getelementptr i8, ptr [[IV_NEXT_2]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_4:%.*]] = getelementptr i8, ptr [[IV_NEXT_3]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_5:%.*]] = getelementptr i8, ptr [[IV_NEXT_4]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_6:%.*]] = getelementptr i8, ptr [[IV_NEXT_5]], i64 8
+; CHECK-NEXT:    [[IV_NEXT_7]] = getelementptr i8, ptr [[IV_NEXT_6]], i64 8
+; CHECK-NEXT:    [[ICMP_7:%.*]] = icmp eq ptr [[IV_NEXT_6]], [[GEP]]
+; CHECK-NEXT:    br i1 [[ICMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_2]]
+; CHECK:       [[EXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.1
+
+loop.1:
+  %phi = phi ptr [ null, %entry ], [ %p, %loop.1 ]
+  %load = load i64, ptr %p, align 8
+  %add = add i64 %load, 1
+  br i1 %c, label %if, label %loop.1
+
+if:
+  %add.lcssa = phi i64 [ %add, %loop.1 ]
+  %gep = getelementptr i64, ptr %p, i64 %add.lcssa
+  br label %loop.2
+
+loop.2:
+  %iv = phi ptr [ %p, %if ], [ %iv.next, %loop.2 ]
+  %iv.next = getelementptr i8, ptr %iv, i64 8
+  %icmp = icmp eq ptr %iv, %gep
+  br i1 %icmp, label %exit, label %loop.2
+
+exit:
+  ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.disable"}
+;.

From 7d1f2065d68795b6fc6de4953f9f0ac719cf1c65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 12 Sep 2024 22:20:14 +0300
Subject: [PATCH 322/427] [clang-scan-deps] Infer the target from the
 executable name (#108189)

This allows clang-scan-deps to work correctly when using cross compilers
with names like <triple>-clang.

(cherry picked from commit 87e1104cf0e2de0d04bee2944893fa7897277b2f)
---
 clang/test/ClangScanDeps/implicit-target.c    | 31 +++++++++++++++++++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp |  5 +++
 2 files changed, 36 insertions(+)
 create mode 100644 clang/test/ClangScanDeps/implicit-target.c

diff --git a/clang/test/ClangScanDeps/implicit-target.c b/clang/test/ClangScanDeps/implicit-target.c
new file mode 100644
index 0000000000000..cf757f937331a
--- /dev/null
+++ b/clang/test/ClangScanDeps/implicit-target.c
@@ -0,0 +1,31 @@
+// Check that we can detect an implicit target when clang is invoked as
+// <triple->clang. Using an implicit triple requires that the target actually
+// is available, too.
+// REQUIRES: x86-registered-target
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// Check that we can deduce this both when using a compilation database, and when using
+// a literal command line.
+
+// RUN: clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s
+
+// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s
+
+// CHECK: "-triple",
+// CHECK-NEXT: "x86_64-w64-windows-gnu",
+
+
+//--- cdb.json.in
+[
+  {
+    "directory": "DIR"
+    "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o"
+    "file": "DIR/source.c"
+  },
+]
+
+//--- source.c
+void func(void) {}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index a8f6150dd3493..cd6dd2620152a 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -15,6 +15,7 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
@@ -24,6 +25,7 @@
 #include "llvm/Support/LLVMDriver.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/Timer.h"
@@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) {
 }
 
 int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
+  llvm::InitializeAllTargetInfos();
   std::string ErrorMessage;
   std::unique_ptr<tooling::CompilationDatabase> Compilations =
       getCompilationDatabase(argc, argv, ErrorMessage);
@@ -810,6 +813,8 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   Compilations = expandResponseFiles(std::move(Compilations),
                                      llvm::vfs::getRealFileSystem());
 
+  Compilations = inferTargetAndDriverMode(std::move(Compilations));
+
   // The command options are rewritten to run Clang in preprocessor only mode.
   auto AdjustingCompilations =
       std::make_unique<tooling::ArgumentsAdjustingCompilations>(

From a0fc8a2b2b85a70c8c523ff2d1fe4ef2e86cda7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 12 Sep 2024 23:11:27 +0300
Subject: [PATCH 323/427] [clang-scan-deps] Fix builds with
 BUILD_SHARED_LIBS=ON

This fixes building in this configuration after
87e1104cf0e2de0d04bee2944893fa7897277b2f.

(cherry picked from commit aa3465793a250faa5426ac626989375465256658)
---
 clang/tools/clang-scan-deps/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/tools/clang-scan-deps/CMakeLists.txt b/clang/tools/clang-scan-deps/CMakeLists.txt
index f0be6a546ff88..10bc0ff23c548 100644
--- a/clang/tools/clang-scan-deps/CMakeLists.txt
+++ b/clang/tools/clang-scan-deps/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
   Core
   Option
   Support

From 2b6c23303f7c3f6397003cdac4be6e9e6b78d957 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Fri, 13 Sep 2024 23:18:10 +0300
Subject: [PATCH 324/427] [clang-scan-deps] Infer the tool locations from PATH
 (#108539)

This allows the clang driver to know which tool is meant to be executed,
which allows the clang driver to load the right clang config files, and
allows clang to find colocated sysroots.

This makes sure that doing `clang-scan-deps -- <tool> ...` looks up
things in the same way as if one just would execute `<tool> ...`, when
`<tool>` isn't an absolute or relative path.

(cherry picked from commit a26ec542371652e1d774696e90016fd5b0b1c191)
---
 .../clang/Tooling/CompilationDatabase.h       |  6 ++
 clang/lib/Tooling/CMakeLists.txt              |  1 +
 .../Tooling/LocateToolCompilationDatabase.cpp | 71 +++++++++++++++++++
 .../ClangScanDeps/modules-extern-submodule.c  |  3 +-
 .../modules-full-output-tu-order.c            |  6 +-
 .../modules-has-include-umbrella-header.c     |  3 +-
 .../ClangScanDeps/modules-header-sharing.m    |  3 +-
 .../modules-implementation-module-map.c       |  3 +-
 .../modules-implementation-private.m          |  3 +-
 .../ClangScanDeps/modules-priv-fw-from-pub.m  |  3 +-
 .../ClangScanDeps/resolve-executable-path.c   | 32 +++++++++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp |  2 +
 12 files changed, 120 insertions(+), 16 deletions(-)
 create mode 100644 clang/lib/Tooling/LocateToolCompilationDatabase.cpp
 create mode 100644 clang/test/ClangScanDeps/resolve-executable-path.c

diff --git a/clang/include/clang/Tooling/CompilationDatabase.h b/clang/include/clang/Tooling/CompilationDatabase.h
index fee584acb4862..36fe0812ebe97 100644
--- a/clang/include/clang/Tooling/CompilationDatabase.h
+++ b/clang/include/clang/Tooling/CompilationDatabase.h
@@ -234,6 +234,12 @@ std::unique_ptr<CompilationDatabase>
 std::unique_ptr<CompilationDatabase>
 inferTargetAndDriverMode(std::unique_ptr<CompilationDatabase> Base);
 
+/// Returns a wrapped CompilationDatabase that will transform argv[0] to an
+/// absolute path, if it currently is a plain tool name, looking it up in
+/// PATH.
+std::unique_ptr<CompilationDatabase>
+inferToolLocation(std::unique_ptr<CompilationDatabase> Base);
+
 /// Returns a wrapped CompilationDatabase that will expand all rsp(response)
 /// files on commandline returned by underlying database.
 std::unique_ptr<CompilationDatabase>
diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt
index 93a9e707a134c..fc1f1f9f9d367 100644
--- a/clang/lib/Tooling/CMakeLists.txt
+++ b/clang/lib/Tooling/CMakeLists.txt
@@ -25,6 +25,7 @@ add_clang_library(clangTooling
   GuessTargetAndModeCompilationDatabase.cpp
   InterpolatingCompilationDatabase.cpp
   JSONCompilationDatabase.cpp
+  LocateToolCompilationDatabase.cpp
   Refactoring.cpp
   RefactoringCallbacks.cpp
   StandaloneExecution.cpp
diff --git a/clang/lib/Tooling/LocateToolCompilationDatabase.cpp b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp
new file mode 100644
index 0000000000000..033f69f3760c6
--- /dev/null
+++ b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp
@@ -0,0 +1,71 @@
+//===- GuessTargetAndModeCompilationDatabase.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/CompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include <memory>
+
+namespace clang {
+namespace tooling {
+
+namespace {
+class LocationAdderDatabase : public CompilationDatabase {
+public:
+  LocationAdderDatabase(std::unique_ptr<CompilationDatabase> Base)
+      : Base(std::move(Base)) {
+    assert(this->Base != nullptr);
+  }
+
+  std::vector<std::string> getAllFiles() const override {
+    return Base->getAllFiles();
+  }
+
+  std::vector<CompileCommand> getAllCompileCommands() const override {
+    return addLocation(Base->getAllCompileCommands());
+  }
+
+  std::vector<CompileCommand>
+  getCompileCommands(StringRef FilePath) const override {
+    return addLocation(Base->getCompileCommands(FilePath));
+  }
+
+private:
+  std::vector<CompileCommand>
+  addLocation(std::vector<CompileCommand> Cmds) const {
+    for (auto &Cmd : Cmds) {
+      if (Cmd.CommandLine.empty())
+        continue;
+      std::string &Driver = Cmd.CommandLine.front();
+      // If the driver name already is absolute, we don't need to do anything.
+      if (llvm::sys::path::is_absolute(Driver))
+        continue;
+      // If the name is a relative path, like bin/clang, we assume it's
+      // possible to resolve it and don't do anything about it either.
+      if (llvm::any_of(Driver,
+                       [](char C) { return llvm::sys::path::is_separator(C); }))
+        continue;
+      auto Absolute = llvm::sys::findProgramByName(Driver);
+      // If we found it in path, update the entry in Cmd.CommandLine
+      if (Absolute && llvm::sys::path::is_absolute(*Absolute))
+        Driver = std::move(*Absolute);
+    }
+    return Cmds;
+  }
+  std::unique_ptr<CompilationDatabase> Base;
+};
+} // namespace
+
+std::unique_ptr<CompilationDatabase>
+inferToolLocation(std::unique_ptr<CompilationDatabase> Base) {
+  return std::make_unique<LocationAdderDatabase>(std::move(Base));
+}
+
+} // namespace tooling
+} // namespace clang
diff --git a/clang/test/ClangScanDeps/modules-extern-submodule.c b/clang/test/ClangScanDeps/modules-extern-submodule.c
index 92f2c749dd2c3..6e9082b0165fd 100644
--- a/clang/test/ClangScanDeps/modules-extern-submodule.c
+++ b/clang/test/ClangScanDeps/modules-extern-submodule.c
@@ -112,8 +112,7 @@ module third {}
 // CHECK:                  "-fmodule-map-file=[[PREFIX]]/first/first/module.modulemap",
 // CHECK:                  "-fmodule-file=first=[[PREFIX]]/cache/{{.*}}/first-{{.*}}.pcm",
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.m"
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.c"
diff --git a/clang/test/ClangScanDeps/modules-full-output-tu-order.c b/clang/test/ClangScanDeps/modules-full-output-tu-order.c
index 04939826817fc..065b8ac8ae07f 100644
--- a/clang/test/ClangScanDeps/modules-full-output-tu-order.c
+++ b/clang/test/ClangScanDeps/modules-full-output-tu-order.c
@@ -35,8 +35,7 @@
 // CHECK:                  "-D"
 // CHECK-NEXT:             "ONE"
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.c"
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.c"
@@ -52,8 +51,7 @@
 // CHECK:                  "-D"
 // CHECK-NEXT:             "TWO"
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.c"
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.c"
diff --git a/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c b/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c
index e9363b2e14b07..022c59ca65db2 100644
--- a/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c
+++ b/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c
@@ -64,8 +64,7 @@ module Dependency { header "dependency.h" }
 // CHECK:                ],
 // CHECK-NEXT:           "command-line": [
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.c"
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.c"
diff --git a/clang/test/ClangScanDeps/modules-header-sharing.m b/clang/test/ClangScanDeps/modules-header-sharing.m
index ec94923ae8eee..31ef351ec38b7 100644
--- a/clang/test/ClangScanDeps/modules-header-sharing.m
+++ b/clang/test/ClangScanDeps/modules-header-sharing.m
@@ -77,8 +77,7 @@
 // CHECK:                  "-fmodule-map-file=[[PREFIX]]/frameworks/A.framework/Modules/module.modulemap",
 // CHECK:                  "-fmodule-name=A",
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.m",
 // CHECK-NEXT:             "[[PREFIX]]/shared/H.h"
 // CHECK-NEXT:           ],
diff --git a/clang/test/ClangScanDeps/modules-implementation-module-map.c b/clang/test/ClangScanDeps/modules-implementation-module-map.c
index d76d315700469..b7637d0c9143a 100644
--- a/clang/test/ClangScanDeps/modules-implementation-module-map.c
+++ b/clang/test/ClangScanDeps/modules-implementation-module-map.c
@@ -27,8 +27,7 @@ framework module FWPrivate { header "private.h" }
 // CHECK:                "-fmodule-map-file=[[PREFIX]]/frameworks/FW.framework/Modules/module.private.modulemap",
 // CHECK:                "-fmodule-name=FWPrivate",
 // CHECK:              ],
-// CHECK-NEXT:         "executable": "clang",
-// CHECK-NEXT:         "file-deps": [
+// CHECK:              "file-deps": [
 // CHECK-NEXT:           "[[PREFIX]]/tu.m"
 // CHECK-NEXT:         ],
 // CHECK-NEXT:         "input-file": "[[PREFIX]]/tu.m"
diff --git a/clang/test/ClangScanDeps/modules-implementation-private.m b/clang/test/ClangScanDeps/modules-implementation-private.m
index acc01017d6640..b376073f4b9ee 100644
--- a/clang/test/ClangScanDeps/modules-implementation-private.m
+++ b/clang/test/ClangScanDeps/modules-implementation-private.m
@@ -62,8 +62,7 @@
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "command-line": [
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.m",
 // CHECK-NEXT:             "[[PREFIX]]/frameworks/FW.framework/PrivateHeaders/Missed.h",
 // CHECK-NEXT:             "[[PREFIX]]/frameworks/FW.framework/Headers/FW.h"
diff --git a/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m
index 4847fedac3bf6..e961308846363 100644
--- a/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m
+++ b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m
@@ -110,8 +110,7 @@
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "command-line": [
 // CHECK:                ],
-// CHECK-NEXT:           "executable": "clang",
-// CHECK-NEXT:           "file-deps": [
+// CHECK:                "file-deps": [
 // CHECK-NEXT:             "[[PREFIX]]/tu.m"
 // CHECK-NEXT:           ],
 // CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.m"
diff --git a/clang/test/ClangScanDeps/resolve-executable-path.c b/clang/test/ClangScanDeps/resolve-executable-path.c
new file mode 100644
index 0000000000000..63e34ca256a8b
--- /dev/null
+++ b/clang/test/ClangScanDeps/resolve-executable-path.c
@@ -0,0 +1,32 @@
+// UNSUPPORTED: system-windows
+
+// Check that we expand the executable name to an absolute path, when invoked
+// with a plain executable name, which is implied to be found in PATH.
+// REQUIRES: x86-registered-target
+
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/bin
+// RUN: ln -s %clang %t/bin/x86_64-w64-mingw32-clang
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// Check that we can deduce this both when using a compilation database, and when using
+// a literal command line.
+
+// RUN: env "PATH=%t/bin:%PATH%" clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s -DBASE=%/t
+
+// RUN: env "PATH=%t/bin:%PATH%" clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s -DBASE=%/t
+
+// CHECK: "executable": "[[BASE]]/bin/x86_64-w64-mingw32-clang"
+
+//--- cdb.json.in
+[
+  {
+    "directory": "DIR"
+    "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o"
+    "file": "DIR/source.c"
+  },
+]
+
+//--- source.c
+void func(void) {}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index cd6dd2620152a..0f581e73cdfe4 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -815,6 +815,8 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
 
   Compilations = inferTargetAndDriverMode(std::move(Compilations));
 
+  Compilations = inferToolLocation(std::move(Compilations));
+
   // The command options are rewritten to run Clang in preprocessor only mode.
   auto AdjustingCompilations =
       std::make_unique<tooling::ArgumentsAdjustingCompilations>(

From 997b66e566886b8a395b852db46e7930f757b818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 19 Sep 2024 20:46:09 +0300
Subject: [PATCH 325/427] [clang-scan-deps] Don't inspect Args[0] as an option
 (#109050)

Since a26ec542371652e1d774696e90016fd5b0b1c191, we expand the executable
name to an absolute path, if it isn't already one, if found in path.

This broke a couple tests in some environments; when the clang workdir
resides in a path under e.g. /opt. Tests that only use a tool name like
"clang-cl" would get expanded to the absolute path in the build tree.
The loop for finding the last "-o" like option for clang-cl command
lines would inspect all arguments, including Args[0] which is the
executable name itself. As an /opt path matches Arg.starts_with("/o"),
this would get detected as an object file output name in cases where
there was no other explicit output argument.

Thus, this fixes those tests in workdirs under e.g. /opt.

(cherry picked from commit cead9044a995910306e2e64b426fcc8042d7e0ef)
---
 clang/tools/clang-scan-deps/ClangScanDeps.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0f581e73cdfe4..867df19c863fe 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -837,7 +837,12 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
 
           // Reverse scan, starting at the end or at the element before "--".
           auto R = std::make_reverse_iterator(FlagsEnd);
-          for (auto I = R, E = Args.rend(); I != E; ++I) {
+          auto E = Args.rend();
+          // Don't include Args[0] in the iteration; that's the executable, not
+          // an option.
+          if (E != R)
+            E--;
+          for (auto I = R; I != E; ++I) {
             StringRef Arg = *I;
             if (ClangCLMode) {
               // Ignore arguments that are preceded by "-Xclang".

From b3734d9f93c1f8d908836a966f77c6792242df99 Mon Sep 17 00:00:00 2001
From: Weining Lu <luweining@loongson.cn>
Date: Mon, 19 Aug 2024 16:51:21 +0800
Subject: [PATCH 326/427] [LoongArch] Fix the assertion for atomic store with
 'ptr' type

(cherry picked from commit 63267ca9016aa334b329aa408716456b4e3799c8)
---
 .../LoongArch/LoongArchISelLowering.cpp       |   5 +-
 .../ir-instruction/load-store-atomic.ll       | 119 ++++++++++++++++++
 2 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 93edafaff553b..082b42398c6a7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5601,8 +5601,9 @@ bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
 
   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
   // require fences beacuse we can use amswap_db.[w/d].
-  if (isa<StoreInst>(I)) {
-    unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
+  Type *Ty = I->getOperand(0)->getType();
+  if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
+    unsigned Size = Ty->getIntegerBitWidth();
     return (Size == 8 || Size == 16);
   }
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
index c51fded410e83..1af2b38d79943 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
@@ -72,6 +72,22 @@ define i64 @load_acquire_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_acquire_ptr(ptr %ptr) {
+; LA32-LABEL: load_acquire_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    dbar 20
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_acquire_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    dbar 20
+; LA64-NEXT:    ret
+  %val = load atomic ptr, ptr %ptr acquire, align 8
+  ret ptr %val
+}
+
 define i8 @load_unordered_i8(ptr %ptr) {
 ; LA32-LABEL: load_unordered_i8:
 ; LA32:       # %bb.0:
@@ -135,6 +151,20 @@ define i64 @load_unordered_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_unordered_ptr(ptr %ptr) {
+; LA32-LABEL: load_unordered_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_unordered_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    ret
+  %val = load atomic ptr, ptr %ptr unordered, align 8
+  ret ptr %val
+}
+
 define i8 @load_monotonic_i8(ptr %ptr) {
 ; LA32-LABEL: load_monotonic_i8:
 ; LA32:       # %bb.0:
@@ -198,6 +228,20 @@ define i64 @load_monotonic_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_monotonic_ptr(ptr %ptr) {
+; LA32-LABEL: load_monotonic_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_monotonic_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    ret
+  %val = load atomic ptr, ptr %ptr monotonic, align 8
+  ret ptr %val
+}
+
 define i8 @load_seq_cst_i8(ptr %ptr) {
 ; LA32-LABEL: load_seq_cst_i8:
 ; LA32:       # %bb.0:
@@ -268,6 +312,22 @@ define i64 @load_seq_cst_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_seq_cst_ptr(ptr %ptr) {
+; LA32-LABEL: load_seq_cst_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    dbar 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_seq_cst_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    dbar 16
+; LA64-NEXT:    ret
+  %val = load atomic ptr, ptr %ptr seq_cst, align 8
+  ret ptr %val
+}
+
 define void @store_release_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_release_i8:
 ; LA32:       # %bb.0:
@@ -336,6 +396,21 @@ define void @store_release_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_release_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_release_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    dbar 18
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_release_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    amswap_db.d $zero, $a1, $a0
+; LA64-NEXT:    ret
+  store atomic ptr %v, ptr %ptr release, align 8
+  ret void
+}
+
 define void @store_unordered_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_unordered_i8:
 ; LA32:       # %bb.0:
@@ -399,6 +474,20 @@ define void @store_unordered_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_unordered_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_unordered_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_unordered_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ret
+  store atomic ptr %v, ptr %ptr unordered, align 8
+  ret void
+}
+
 define void @store_monotonic_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_monotonic_i8:
 ; LA32:       # %bb.0:
@@ -462,6 +551,20 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_monotonic_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_monotonic_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_monotonic_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ret
+  store atomic ptr %v, ptr %ptr monotonic, align 8
+  ret void
+}
+
 define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_seq_cst_i8:
 ; LA32:       # %bb.0:
@@ -534,3 +637,19 @@ define void @store_seq_cst_i64(ptr %ptr, i64 %v) {
   store atomic i64 %v, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define void @store_seq_cst_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_seq_cst_ptr:
+; LA32:       # %bb.0:
+; LA32-NEXT:    dbar 16
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    dbar 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_seq_cst_ptr:
+; LA64:       # %bb.0:
+; LA64-NEXT:    amswap_db.d $zero, $a1, $a0
+; LA64-NEXT:    ret
+  store atomic ptr %v, ptr %ptr seq_cst, align 8
+  ret void
+}

From 8679d1b51bd91d638ac3babba03a404e4031f9ea Mon Sep 17 00:00:00 2001
From: Timothy Pearson <162513562+tpearson-ssc@users.noreply.github.com>
Date: Wed, 25 Sep 2024 02:09:50 -0500
Subject: [PATCH 327/427] [SDAG] Honor signed arguments in floating point
 libcalls (#109134)

In ExpandFPLibCall, an assumption is made that all floating point
libcalls that take integer arguments use unsigned integers. In the case
of ldexp and frexp, this assumption is incorrect, leading to
miscompilation and subsequent target-dependent incorrect operation.

Indicate that ldexp and frexp utilize signed arguments in
ExpandFPLibCall.

Fixes #108904

Signed-off-by: Timothy Pearson <tpearson@solidsilicon.com>
(cherry picked from commit 90c14748638f1e10e31173b145fdbb5c4529c922)
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  3 +-
 llvm/test/CodeGen/PowerPC/ldexp-libcall.ll    |  4 +-
 llvm/test/CodeGen/PowerPC/ldexp.ll            | 36 ++++++----
 .../PowerPC/negative-integer-fp-libcall.ll    | 26 +++++++
 .../X86/fold-int-pow2-with-fmul-or-fdiv.ll    | 69 ++++++++++++-------
 5 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62..4b25f553ffae9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
     Results.push_back(Tmp.first);
     Results.push_back(Tmp.second);
   } else {
-    SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+    bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+    SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
     Results.push_back(Tmp);
   }
 }
diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d920365..e531516c37e87 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    extsw r4, r4
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    extsw r4, r4
 ; CHECK-NEXT:    bl ldexp
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30b..ffc826cc86de5 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
 ; CHECK-NEXT:    .cfi_offset v29, -48
 ; CHECK-NEXT:    .cfi_offset v30, -32
 ; CHECK-NEXT:    .cfi_offset v31, -16
-; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    vextuwrx r4, r3, v3
+; CHECK-NEXT:    vextuwrx r3, r3, v3
 ; CHECK-NEXT:    stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    vmr v30, v2
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxswapd vs0, v30
 ; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    xxswapd vs0, v30
 ; CHECK-NEXT:    xscvdpspn v29, f1
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    vextuwrx r4, r3, v31
+; CHECK-NEXT:    vextuwrx r3, r3, v31
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
 ; CHECK-NEXT:    .cfi_offset v29, -48
 ; CHECK-NEXT:    .cfi_offset v30, -32
 ; CHECK-NEXT:    .cfi_offset v31, -16
-; CHECK-NEXT:    li r3, 12
-; CHECK-NEXT:    xscvspdpn f1, v2
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    xxswapd vs0, v2
 ; CHECK-NEXT:    stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    xscvspdpn f1, vs0
+; CHECK-NEXT:    vextuwrx r3, r3, v3
 ; CHECK-NEXT:    stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    vmr v30, v2
-; CHECK-NEXT:    vextuwrx r4, r3, v3
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxswapd vs0, v30
-; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    li r3, 12
 ; CHECK-NEXT:    xscpsgndp v29, f1, f1
-; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    vextuwrx r4, r3, v31
+; CHECK-NEXT:    xscvspdpn f1, v30
+; CHECK-NEXT:    vextuwrx r3, r3, v31
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    xxmrghd vs0, v29, vs1
+; CHECK-NEXT:    xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    vextuwrx r4, r3, v31
+; CHECK-NEXT:    vextuwrx r3, r3, v31
 ; CHECK-NEXT:    xvcvdpsp v28, vs0
 ; CHECK-NEXT:    xxsldwi vs0, v30, v30, 3
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    xscvspdpn f1, vs0
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    xxsldwi vs0, v30, v30, 1
+; CHECK-NEXT:    mfvsrwz r3, v31
 ; CHECK-NEXT:    xscpsgndp v29, f1, f1
-; CHECK-NEXT:    mfvsrwz r4, v31
+; CHECK-NEXT:    extsw r4, r3
 ; CHECK-NEXT:    xscvspdpn f1, vs0
 ; CHECK-NEXT:    bl ldexpf
 ; CHECK-NEXT:    nop
@@ -156,7 +162,7 @@ define half @ldexp_f16(half %arg0, i32 %arg1) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    xscvdphp f0, f1
-; CHECK-NEXT:    clrldi r4, r4, 32
+; CHECK-NEXT:    extsw r4, r4
 ; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
diff --git a/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll
new file mode 100644
index 0000000000000..010ee6ef043e7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -O1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; Test that a negative parameter smaller than 64 bits (e.g., int)
+; is correctly implemented with sign-extension when passed to
+; a floating point libcall.
+
+define double @ldexp_test(ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: ldexp_test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    std 0, 128(1)
+; CHECK-NEXT:    lfd 1, 0(3)
+; CHECK-NEXT:    lwa 4, 0(4)
+; CHECK-NEXT:    bl ldexp
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 1, 1, 112
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+  %base = load double, ptr %a
+  %exp = load i32, ptr %b
+  %call = call double @llvm.ldexp.f64.i32(double %base, i32 signext %exp)
+  ret double %call
+}
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 5a051a9c499e4..332fbf7188af8 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -406,13 +406,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-SSE-NEXT:    subq $72, %rsp
 ; CHECK-SSE-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
-; CHECK-SSE-NEXT:    pextrw $7, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $7, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $6, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $6, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
@@ -420,13 +422,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-SSE-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $5, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $5, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $4, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $4, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
@@ -436,13 +440,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-SSE-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 ; CHECK-SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $3, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $3, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $2, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $2, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
@@ -450,14 +456,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-SSE-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT:    pextrw $1, %xmm0, %edi
+; CHECK-SSE-NEXT:    pextrw $1, %xmm0, %eax
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-SSE-NEXT:    movd %xmm0, %eax
-; CHECK-SSE-NEXT:    movzwl %ax, %edi
+; CHECK-SSE-NEXT:    movswl %ax, %edi
 ; CHECK-SSE-NEXT:    movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-SSE-NEXT:    callq ldexpf@PLT
 ; CHECK-SSE-NEXT:    callq __truncsfhf2@PLT
@@ -476,13 +483,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX2-NEXT:    subq $72, %rsp
 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-AVX2-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX2-NEXT:    vpextrw $7, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $7, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $6, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $6, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
@@ -490,13 +499,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $5, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $5, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $4, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $4, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
@@ -506,13 +517,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 ; CHECK-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $3, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $3, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $2, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $2, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
@@ -520,14 +533,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX2-NEXT:    vpextrw $1, %xmm0, %edi
+; CHECK-AVX2-NEXT:    vpextrw $1, %xmm0, %eax
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
 ; CHECK-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-AVX2-NEXT:    vmovd %xmm0, %eax
-; CHECK-AVX2-NEXT:    movzwl %ax, %edi
+; CHECK-AVX2-NEXT:    movswl %ax, %edi
 ; CHECK-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX2-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX2-NEXT:    callq __truncsfhf2@PLT
@@ -546,7 +560,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    subq $72, %rsp
 ; CHECK-AVX512F-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX512F-NEXT:    vpextrw $7, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $7, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -554,7 +569,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $6, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $6, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -564,7 +580,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $5, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $5, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -572,7 +589,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $4, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $4, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -584,7 +602,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $3, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $3, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -592,7 +611,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $2, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $2, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -602,7 +622,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT:    vpextrw $1, %xmm0, %edi
+; CHECK-AVX512F-NEXT:    vpextrw $1, %xmm0, %eax
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -611,7 +632,7 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
 ; CHECK-AVX512F-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; CHECK-AVX512F-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
 ; CHECK-AVX512F-NEXT:    vmovd %xmm0, %eax
-; CHECK-AVX512F-NEXT:    movzwl %ax, %edi
+; CHECK-AVX512F-NEXT:    movswl %ax, %edi
 ; CHECK-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
 ; CHECK-AVX512F-NEXT:    callq ldexpf@PLT
 ; CHECK-AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0

From 53010fcf66b5a84153bce6b7e866edb596e59cf4 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Fri, 27 Sep 2024 08:53:02 -0400
Subject: [PATCH 328/427] [libc++] Fix AppleClang version number when checking
 for __builtin_verbose_trap support (#110161)

We should have been checking against 1700, not 17000, which was a typo.

(cherry picked from commit 1eba87904b0cbaaee82cfdb835528b85d99320ef)
---
 libcxx/vendor/llvm/default_assertion_handler.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/vendor/llvm/default_assertion_handler.in b/libcxx/vendor/llvm/default_assertion_handler.in
index 3b6d6b2cca53c..e12ccccdaff37 100644
--- a/libcxx/vendor/llvm/default_assertion_handler.in
+++ b/libcxx/vendor/llvm/default_assertion_handler.in
@@ -26,7 +26,8 @@
 #  if __has_builtin(__builtin_verbose_trap)
 // AppleClang shipped a slightly different version of __builtin_verbose_trap from the upstream
 // version before upstream Clang actually got the builtin.
-#    if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000
+// TODO: Remove once AppleClang supports the two-arguments version of the builtin.
+#    if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700
 #      define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap(message)
 #    else
 #      define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap("libc++", message)

From 03d133728ae14704b262c55bbb72ecd9d048add5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@amd.com>
Date: Mon, 30 Sep 2024 10:39:17 +0200
Subject: [PATCH 329/427] AMDGPU: Add test for 16 bit unsigned scratch offsets
 (#110255)

Large scratch offset with one on highest bit selected as negative,
negative offset has same binary representation in 16 bits as large
unsigned offset.

(cherry picked from commit e9d12a6b451bd403d95105aa976a011dc821f126)
---
 .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 239 ++++++++++
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll      | 444 ++++++++++++++++++
 2 files changed, 683 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf3695..47ca6f416b02b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1513,4 +1513,243 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_add_u32 s0, s2, 0xffe8
+; GFX9-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    s_add_u32 s0, s2, 0xffe8
+; GFX10-NEXT:    scratch_load_dword v2, off, s0
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_add_u32 s0, s0, 0xffe8
+; GFX940-NEXT:    scratch_load_dword v2, off, s0
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_add_u32 s0, s0, 0xffe8
+; GFX11-NEXT:    scratch_load_b32 v2, off, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_and_b32 s0, s2, -4
+; GFX9-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; GFX9-NEXT:    scratch_load_dword v2, off, s0 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    s_and_b32 s0, s2, -4
+; GFX10-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; GFX10-NEXT:    scratch_load_dword v2, off, s0 glc dlc
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset_split:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_and_b32 s0, s0, -4
+; GFX940-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; GFX940-NEXT:    scratch_load_dword v2, off, s0 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset_split:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_and_b32 s0, s0, -4
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_add_u32 s0, s0, 0x100ffe8
+; GFX11-NEXT:    scratch_load_b32 v2, off, s0 glc dlc
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset_split:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_and_b32 s0, s0, -4
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_add_co_u32 s0, s0, 0x100ffe8
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+entry:
+  ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5)
+  %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32
+  %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292
+  %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5)
+  %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728
+  %load = load volatile i32, ptr addrspace(5) %split_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
+; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    v_add_u32_e32 v0, s3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffe8
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    v_add3_u32 v0, s2, v0, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, 15
+; GFX9-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, s3, v0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 0xffe8
+; GFX10-NEXT:    scratch_store_dword v0, v1, off
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    v_add_u32_e32 v0, s1, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 0xffe8
+; GFX940-NEXT:    v_add3_u32 v0, s0, v0, v1
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
+; GFX11-NEXT:    scratch_store_b32 v0, v1, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX12:       ; %bb.0: ; %bb
+; GFX12-NEXT:    v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT:    v_add_nc_u32_e32 v0, s0, v0
+; GFX12-NEXT:    scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    s_endpgm
+bb:
+  %add1 = add nsw i32 %sidx, %vidx
+  %add2 = add nsw i32 %add1, 65512
+  %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
+  store volatile i32 15, ptr addrspace(5) %gep, align 4
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
+; GFX9-LABEL: sgpr_base_negative_offset:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_add_u32 s0, s2, 0xffffffe8
+; GFX9-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_negative_offset:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    scratch_load_dword v2, off, s2 offset:-24
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_negative_offset:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_add_u32 s0, s0, 0xffffffe8
+; GFX940-NEXT:    scratch_load_dword v2, off, s0
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_negative_offset:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_negative_offset:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+entry:
+  %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24
+  %0 = load i32, ptr addrspace(5) %scevgep28, align 4
+  store i32 %0, ptr addrspace(1) %out
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 14d8b71c5167a..f040b47428640 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4921,5 +4921,449 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_add_i32 s2, s2, 0xffe8
+; GFX9-NEXT:    scratch_load_dword v2, off, s2
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    s_add_i32 s2, s2, 0xffe8
+; GFX10-NEXT:    scratch_load_dword v2, off, s2
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX11-NEXT:    scratch_load_b32 v2, off, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+;
+; GFX9-PAL-LABEL: sgpr_base_large_offset:
+; GFX9-PAL:       ; %bb.0: ; %entry
+; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX9-PAL-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-PAL-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX940-NEXT:    scratch_load_dword v2, off, s0
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX10-PAL-LABEL: sgpr_base_large_offset:
+; GFX10-PAL:       ; %bb.0: ; %entry
+; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX10-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-PAL-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-PAL-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX10-PAL-NEXT:    scratch_load_dword v2, off, s0
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-PAL-NEXT:    s_endpgm
+;
+; GFX11-PAL-LABEL: sgpr_base_large_offset:
+; GFX11-PAL:       ; %bb.0: ; %entry
+; GFX11-PAL-NEXT:    s_add_i32 s0, s0, 0xffe8
+; GFX11-PAL-NEXT:    scratch_load_b32 v2, off, s0
+; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-PAL-NEXT:    s_nop 0
+; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-PAL-NEXT:    s_endpgm
+;
+; GFX12-PAL-LABEL: sgpr_base_large_offset:
+; GFX12-PAL:       ; %bb.0: ; %entry
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-PAL-NEXT:    s_nop 0
+; GFX12-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-PAL-NEXT:    s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_and_b32 s0, s2, -4
+; GFX9-NEXT:    s_add_i32 s0, s0, 0x100f000
+; GFX9-NEXT:    scratch_load_dword v2, off, s0 offset:4072 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    s_and_b32 s0, s2, -4
+; GFX10-NEXT:    s_add_i32 s0, s0, 0x100f800
+; GFX10-NEXT:    scratch_load_dword v2, off, s0 offset:2024 glc dlc
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset_split:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0x100f000
+; GFX11-NEXT:    s_and_b32 s0, s0, -4
+; GFX11-NEXT:    scratch_load_b32 v2, v2, s0 offset:4072 glc dlc
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset_split:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    v_mov_b32_e32 v2, 0x1000000
+; GFX12-NEXT:    s_and_b32 s0, s0, -4
+; GFX12-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+;
+; GFX9-PAL-LABEL: sgpr_base_large_offset_split:
+; GFX9-PAL:       ; %bb.0: ; %entry
+; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-PAL-NEXT:    s_and_b32 s0, s0, -4
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, 0x100f000
+; GFX9-PAL-NEXT:    scratch_load_dword v2, off, s0 offset:4072 glc
+; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-PAL-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset_split:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_and_b32 s0, s0, -4
+; GFX940-NEXT:    v_mov_b32_e32 v2, 0x100f000
+; GFX940-NEXT:    scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX10-PAL-LABEL: sgpr_base_large_offset_split:
+; GFX10-PAL:       ; %bb.0: ; %entry
+; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX10-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-PAL-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-PAL-NEXT:    s_and_b32 s0, s0, -4
+; GFX10-PAL-NEXT:    s_add_i32 s0, s0, 0x100f800
+; GFX10-PAL-NEXT:    scratch_load_dword v2, off, s0 offset:2024 glc dlc
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-PAL-NEXT:    s_endpgm
+;
+; GFX11-PAL-LABEL: sgpr_base_large_offset_split:
+; GFX11-PAL:       ; %bb.0: ; %entry
+; GFX11-PAL-NEXT:    v_mov_b32_e32 v2, 0x100f000
+; GFX11-PAL-NEXT:    s_and_b32 s0, s0, -4
+; GFX11-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:4072 glc dlc
+; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-PAL-NEXT:    s_nop 0
+; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-PAL-NEXT:    s_endpgm
+;
+; GFX12-PAL-LABEL: sgpr_base_large_offset_split:
+; GFX12-PAL:       ; %bb.0: ; %entry
+; GFX12-PAL-NEXT:    v_mov_b32_e32 v2, 0x1000000
+; GFX12-PAL-NEXT:    s_and_b32 s0, s0, -4
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-PAL-NEXT:    s_nop 0
+; GFX12-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-PAL-NEXT:    s_endpgm
+entry:
+  ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5)
+  %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32
+  %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292
+  %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5)
+  %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728
+  %load = load volatile i32, ptr addrspace(5) %split_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
+; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_add_i32 s2, s2, s3
+; GFX9-NEXT:    v_add_u32_e32 v0, s2, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, 0xffe8, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 15
+; GFX9-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    s_add_i32 s2, s2, s3
+; GFX10-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-NEXT:    v_add3_u32 v0, s2, v0, 0xffe8
+; GFX10-NEXT:    scratch_store_dword v0, v1, off
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX11:       ; %bb.0: ; %bb
+; GFX11-NEXT:    s_add_i32 s0, s0, s1
+; GFX11-NEXT:    v_mov_b32_e32 v1, 15
+; GFX11-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
+; GFX11-NEXT:    scratch_store_b32 v0, v1, off dlc
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX12:       ; %bb.0: ; %bb
+; GFX12-NEXT:    v_mov_b32_e32 v1, 15
+; GFX12-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    s_endpgm
+;
+; GFX9-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX9-PAL:       ; %bb.0: ; %bb
+; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-PAL-NEXT:    s_add_i32 s0, s0, s1
+; GFX9-PAL-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX9-PAL-NEXT:    v_add_u32_e32 v0, 0xffe8, v0
+; GFX9-PAL-NEXT:    scratch_store_dword v0, v1, off
+; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-PAL-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX940:       ; %bb.0: ; %bb
+; GFX940-NEXT:    s_add_i32 s0, s0, s1
+; GFX940-NEXT:    v_add_u32_e32 v0, s0, v0
+; GFX940-NEXT:    v_add_u32_e32 v0, 0xffe8, v0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 15
+; GFX940-NEXT:    scratch_store_dword v0, v1, off sc0 sc1
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    s_endpgm
+;
+; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX10-PAL:       ; %bb.0: ; %bb
+; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX10-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-PAL-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-PAL-NEXT:    s_add_i32 s0, s0, s1
+; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX10-PAL-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
+; GFX10-PAL-NEXT:    scratch_store_dword v0, v1, off
+; GFX10-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-PAL-NEXT:    s_endpgm
+;
+; GFX11-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX11-PAL:       ; %bb.0: ; %bb
+; GFX11-PAL-NEXT:    s_add_i32 s0, s0, s1
+; GFX11-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX11-PAL-NEXT:    v_add3_u32 v0, s0, v0, 0xffe8
+; GFX11-PAL-NEXT:    scratch_store_b32 v0, v1, off dlc
+; GFX11-PAL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-PAL-NEXT:    s_endpgm
+;
+; GFX12-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset:
+; GFX12-PAL:       ; %bb.0: ; %bb
+; GFX12-PAL-NEXT:    v_mov_b32_e32 v1, 15
+; GFX12-PAL-NEXT:    s_add_co_i32 s0, s0, s1
+; GFX12-PAL-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    s_wait_storecnt 0x0
+; GFX12-PAL-NEXT:    s_endpgm
+bb:
+  %add1 = add nsw i32 %sidx, %vidx
+  %add2 = add nsw i32 %add1, 65512
+  %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
+  store volatile i32 15, ptr addrspace(5) %gep, align 4
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
+; GFX9-LABEL: sgpr_base_negative_offset:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:    s_addk_i32 s2, 0xffe8
+; GFX9-NEXT:    scratch_load_dword v2, off, s2
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: sgpr_base_negative_offset:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_add_u32 s0, s0, s5
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:    scratch_load_dword v2, off, s2 offset:-24
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: sgpr_base_negative_offset:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: sgpr_base_negative_offset:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_nop 0
+; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:    s_endpgm
+;
+; GFX9-PAL-LABEL: sgpr_base_negative_offset:
+; GFX9-PAL:       ; %bb.0: ; %entry
+; GFX9-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX9-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX9-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s5
+; GFX9-PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; GFX9-PAL-NEXT:    s_addk_i32 s0, 0xffe8
+; GFX9-PAL-NEXT:    scratch_load_dword v2, off, s0
+; GFX9-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-PAL-NEXT:    s_endpgm
+;
+; GFX940-LABEL: sgpr_base_negative_offset:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_addk_i32 s0, 0xffe8
+; GFX940-NEXT:    scratch_load_dword v2, off, s0
+; GFX940-NEXT:    s_waitcnt vmcnt(0)
+; GFX940-NEXT:    global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:    s_endpgm
+;
+; GFX10-PAL-LABEL: sgpr_base_negative_offset:
+; GFX10-PAL:       ; %bb.0: ; %entry
+; GFX10-PAL-NEXT:    s_getpc_b64 s[2:3]
+; GFX10-PAL-NEXT:    s_mov_b32 s2, s8
+; GFX10-PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; GFX10-PAL-NEXT:    s_add_u32 s2, s2, s5
+; GFX10-PAL-NEXT:    s_addc_u32 s3, s3, 0
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
+; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
+; GFX10-PAL-NEXT:    scratch_load_dword v2, off, s0 offset:-24
+; GFX10-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-PAL-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-PAL-NEXT:    s_endpgm
+;
+; GFX11-PAL-LABEL: sgpr_base_negative_offset:
+; GFX11-PAL:       ; %bb.0: ; %entry
+; GFX11-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX11-PAL-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-PAL-NEXT:    s_nop 0
+; GFX11-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-PAL-NEXT:    s_endpgm
+;
+; GFX12-PAL-LABEL: sgpr_base_negative_offset:
+; GFX12-PAL:       ; %bb.0: ; %entry
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
+; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-PAL-NEXT:    s_nop 0
+; GFX12-PAL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-PAL-NEXT:    s_endpgm
+entry:
+  %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24
+  %0 = load i32, ptr addrspace(5) %scevgep28, align 4
+  store i32 %0, ptr addrspace(1) %out
+  ret void
+}
+
 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
 declare i32 @llvm.amdgcn.workitem.id.x()

From 962edd3f71eebdcd781222cdd97a561979894003 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@amd.com>
Date: Mon, 30 Sep 2024 10:44:59 +0200
Subject: [PATCH 330/427] AMDGPU: Fix inst-selection of large scratch offsets
 with sgpr base (#110256)

Use i32 for offset instead of i16, this way it does not get interpreted
as negative 16 bit offset.

(cherry picked from commit 83fe85115da9dc25fa270d2ea8140113c8d49670)
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  6 +++---
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll      | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b7471bab12850..7b786ee264172 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
                     0);
   }
 
-  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
+  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
 
   return true;
 }
@@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
           return false;
         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
           return false;
-        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+        Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
         return true;
       }
     }
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
     return false;
   SAddr = SelectSAddrFI(CurDAG, SAddr);
-  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index f040b47428640..284f174614522 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-LABEL: sgpr_base_large_offset:
 ; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:    s_nop 0
@@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-PAL-LABEL: sgpr_base_large_offset:
 ; GFX12-PAL:       ; %bb.0: ; %entry
-; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:    s_nop 0
@@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX12:       ; %bb.0: ; %entry
 ; GFX12-NEXT:    v_mov_b32_e32 v2, 0x1000000
 ; GFX12-NEXT:    s_and_b32 s0, s0, -4
-; GFX12-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:    s_nop 0
@@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
 ; GFX12-PAL:       ; %bb.0: ; %entry
 ; GFX12-PAL-NEXT:    v_mov_b32_e32 v2, 0x1000000
 ; GFX12-PAL-NEXT:    s_and_b32 s0, s0, -4
-; GFX12-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:    s_nop 0
@@ -5189,7 +5189,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12:       ; %bb.0: ; %bb
 ; GFX12-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX12-NEXT:    s_add_co_i32 s0, s0, s1
-; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:    s_endpgm
 ;
@@ -5251,7 +5251,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12-PAL:       ; %bb.0: ; %bb
 ; GFX12-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX12-PAL-NEXT:    s_add_co_i32 s0, s0, s1
-; GFX12-PAL-NEXT:    scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:    scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:    s_wait_storecnt 0x0
 ; GFX12-PAL-NEXT:    s_endpgm
 bb:

From fae11d419de1ab34c7e999eda0b2292b85d61c67 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 1 Oct 2024 09:01:21 +0200
Subject: [PATCH 331/427] Bump version to 19.1.1

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 928c6c439bd1f..8bdcd828eb247 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 0)
+  set(LLVM_VERSION_PATCH 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 661af5be3c225..b2abd372929d1 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190100
+#  define _LIBCPP_VERSION 190101
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 3f44a4645acf6..5e0a56f80bfff 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 0
+llvm_version_patch = 1
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 03edfc3360972..b0cac053a938e 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 0)
+__versioninfo__ = (19, 1, 1)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 09e95f3d62058..26e0241539429 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 0)
+__versioninfo__ = (19, 1, 1)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 4dbe72f91a9601e66db2bee59c5bebcdc63f9472 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Thu, 15 Aug 2024 08:14:13 +0000
Subject: [PATCH 332/427] [lldb][test] Mark sys_info zdump test unsupported on
 32 bit Arm Linux

Until https://github.com/llvm/llvm-project/pull/103056 lands
or another more appropriate check can be found.

This test fails on Ubuntu Focal where zdump is built with 32 bit time_t
but passes on Ubuntu Jammy where zdump is built with 64 bit time_t.

Marking it unsupported means Linaro can upgrade its bots to Ubuntu
Jammy without getting an unexpected pass.

(cherry picked from commit 6f6422f4a2b8647a59936c131e50a79906d89510)
---
 .../time.zone.members/sys_info.zdump.pass.cpp                   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 207f8e4df4541..2b97d9a5bc745 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -14,7 +14,7 @@
 // XFAIL: availability-tzdb-missing
 
 // TODO TZDB Investigate
-// XFAIL: target={{armv(7|8)l-linux-gnueabihf}}
+// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
 
 #include <chrono>
 #include <format>

From 23eadbda36887257fcc197fef6f71ddc1830b8ab Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Fri, 13 Sep 2024 09:14:53 +0100
Subject: [PATCH 333/427] [libcxx][test] Use smaller time range for 32 bit
 time_t (#104762)

This fixes the test on Arm 32 bit Ubuntu Jammy where time_t is 32 bit.

(cherry picked from commit cdd608b8f0ce090b3568238387df368751bdbb5d)
---
 .../time.zone.members/sys_info.zdump.pass.cpp                | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 2b97d9a5bc745..b474fe50083b1 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -13,9 +13,6 @@
 // XFAIL: libcpp-has-no-experimental-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB Investigate
-// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
-
 #include <chrono>
 #include <format>
 #include <fstream>
@@ -28,7 +25,7 @@
 // The year range to validate. The dates used in practice are expected to be
 // inside the tested range.
 constexpr std::chrono::year first{1800};
-constexpr std::chrono::year last{2100};
+constexpr std::chrono::year last{sizeof(time_t) == 8 ? 2100 : 2037};
 
 // A custom sys_info class that also stores the name of the time zone.
 // Its formatter matches the output of zdump.

From 1f681b5913f9169332e4126ac0aafeedc5ca6cef Mon Sep 17 00:00:00 2001
From: Joe Faulls <67795994+joe-img@users.noreply.github.com>
Date: Thu, 12 Sep 2024 14:01:55 +0100
Subject: [PATCH 334/427] [CodeGen] Clear InitUndef pass new register cache
 between pass runs (#90967)

Multiple invocations of the pass could interfere with eachother,
preventing some undefs being initialised.

I found it very difficult to create a unit test for this due to it being
dependent on particular allocations of a previous function. However, the
bug can be observed here: https://godbolt.org/z/7xnMo41Gv with the
creation of the illegal instruction `vnsrl.wi v9, v8, 0`
---
 llvm/lib/CodeGen/InitUndef.cpp                |  1 +
 .../rvv/fixed-vectors-buildvec-of-binop.ll    | 13 ++-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  | 96 +++++++++----------
 .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 60 ++++++------
 4 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
index 51c50ff872ef2..8f25ede0eb2b7 100644
--- a/llvm/lib/CodeGen/InitUndef.cpp
+++ b/llvm/lib/CodeGen/InitUndef.cpp
@@ -272,6 +272,7 @@ bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
   for (auto *DeadMI : DeadInsts)
     DeadMI->eraseFromParent();
   DeadInsts.clear();
+  NewRegs.clear();
 
   return Changed;
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 65a1035fd815c..5d75efe681af7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -567,16 +567,15 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b,
 ; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v8, v10, 5
 ; CHECK-NEXT:    vmv.s.x v10, a2
-; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI19_0)
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v12, (a0)
 ; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v8, v10, 6
-; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI19_0)
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v10, 7
-; CHECK-NEXT:    vadd.vv v8, v8, v12
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vmv.s.x v12, a3
+; CHECK-NEXT:    vslideup.vi v8, v12, 7
+; CHECK-NEXT:    vadd.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   %e0 = add i32 %a, 23
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 14f4f44049c53..24a5bd154c64f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2501,9 +2501,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB35_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -2546,9 +2546,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB35_8
 ; RV64ZVE32F-NEXT:    j .LBB35_9
@@ -2652,9 +2652,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB36_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -2697,9 +2697,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB36_8
 ; RV64ZVE32F-NEXT:    j .LBB36_9
@@ -2808,9 +2808,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB37_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -2856,9 +2856,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB37_8
 ; RV64ZVE32F-NEXT:    j .LBB37_9
@@ -2966,9 +2966,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB38_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -3011,9 +3011,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB38_8
 ; RV64ZVE32F-NEXT:    j .LBB38_9
@@ -3118,9 +3118,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB39_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -3163,9 +3163,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a2
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB39_8
 ; RV64ZVE32F-NEXT:    j .LBB39_9
@@ -3275,9 +3275,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ; RV64ZVE32F-NEXT:    add a3, a0, a3
 ; RV64ZVE32F-NEXT:    lw a3, 0(a3)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB40_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a3, a2, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -3323,9 +3323,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ; RV64ZVE32F-NEXT:    add a3, a0, a3
 ; RV64ZVE32F-NEXT:    lw a3, 0(a3)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v8, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a3, a2, 32
 ; RV64ZVE32F-NEXT:    bnez a3, .LBB40_8
 ; RV64ZVE32F-NEXT:    j .LBB40_9
@@ -8200,9 +8200,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB74_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -8245,9 +8245,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB74_8
 ; RV64ZVE32F-NEXT:    j .LBB74_9
@@ -8351,9 +8351,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB75_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -8396,9 +8396,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB75_8
 ; RV64ZVE32F-NEXT:    j .LBB75_9
@@ -8507,9 +8507,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB76_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
@@ -8555,9 +8555,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB76_8
 ; RV64ZVE32F-NEXT:    j .LBB76_9
@@ -8665,9 +8665,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB77_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -8710,9 +8710,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB77_8
 ; RV64ZVE32F-NEXT:    j .LBB77_9
@@ -8817,9 +8817,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB78_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -8862,9 +8862,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
 ; RV64ZVE32F-NEXT:    add a2, a0, a2
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a2, a1, 32
 ; RV64ZVE32F-NEXT:    bnez a2, .LBB78_8
 ; RV64ZVE32F-NEXT:    j .LBB78_9
@@ -8974,9 +8974,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
 ; RV64ZVE32F-NEXT:    add a3, a0, a3
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a3)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 5
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB79_9: # %else14
 ; RV64ZVE32F-NEXT:    andi a3, a2, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
@@ -9022,9 +9022,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
 ; RV64ZVE32F-NEXT:    add a3, a0, a3
 ; RV64ZVE32F-NEXT:    flw fa5, 0(a3)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64ZVE32F-NEXT:    vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT:    vfmv.s.f v12, fa5
 ; RV64ZVE32F-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
-; RV64ZVE32F-NEXT:    vslideup.vi v10, v8, 4
+; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 4
 ; RV64ZVE32F-NEXT:    andi a3, a2, 32
 ; RV64ZVE32F-NEXT:    bnez a3, .LBB79_8
 ; RV64ZVE32F-NEXT:    j .LBB79_9
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index 368f454fa5fda..d6d57eef47e38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-NEXT:    lui a1, %hi(.LCPI15_0)
 ; CHECK-NEXT:    addi a1, a1, %lo(.LCPI15_0)
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a1)
-; CHECK-NEXT:    vmv2r.v v20, v10
+; CHECK-NEXT:    vle16.v v20, (a1)
+; CHECK-NEXT:    vmv2r.v v16, v10
 ; CHECK-NEXT:    vmv2r.v v12, v8
-; CHECK-NEXT:    vrgather.vv v8, v12, v16
+; CHECK-NEXT:    vrgather.vv v8, v12, v20
 ; CHECK-NEXT:    vid.v v12
 ; CHECK-NEXT:    vrsub.vi v12, v12, 15
 ; CHECK-NEXT:    lui a0, 16
@@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vrgather.vv v8, v20, v12, v0.t
+; CHECK-NEXT:    vrgather.vv v8, v16, v12, v0.t
 ; CHECK-NEXT:    ret
   %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b,  <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <32 x i16> %v32i16
@@ -296,12 +296,12 @@ define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v9
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    vrsub.vi v13, v9, 7
+; CHECK-NEXT:    vid.v v13
+; CHECK-NEXT:    vrsub.vi v14, v13, 7
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v10, v8, v13
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v9, 3
+; CHECK-NEXT:    vrsub.vi v8, v13, 3
 ; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vrgatherei16.vv v10, v12, v8, v0.t
@@ -330,12 +330,12 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv2r.v v16, v10
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrsub.vi v18, v10, 15
+; CHECK-NEXT:    vid.v v18
+; CHECK-NEXT:    vrsub.vi v20, v18, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v12, v8, v18
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v10, 7
+; CHECK-NEXT:    vrsub.vi v8, v18, 7
 ; CHECK-NEXT:    li a0, 255
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
@@ -367,10 +367,10 @@ define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
 ; CHECK-NEXT:    addi a1, a1, %lo(.LCPI23_0)
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vle16.v v20, (a1)
+; CHECK-NEXT:    vle16.v v28, (a1)
 ; CHECK-NEXT:    vmv4r.v v24, v12
 ; CHECK-NEXT:    vmv4r.v v16, v8
-; CHECK-NEXT:    vrgatherei16.vv v8, v16, v20
+; CHECK-NEXT:    vrgatherei16.vv v8, v16, v28
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vid.v v16
 ; CHECK-NEXT:    vrsub.vi v16, v16, 15
@@ -430,12 +430,12 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv2r.v v16, v10
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrsub.vi v11, v10, 7
+; CHECK-NEXT:    vid.v v18
+; CHECK-NEXT:    vrsub.vi v19, v18, 7
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v12, v8, v11
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v19
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v10, 3
+; CHECK-NEXT:    vrsub.vi v8, v18, 3
 ; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t
@@ -605,12 +605,12 @@ define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv1r.v v12, v9
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    vrsub.vi v13, v9, 7
+; CHECK-NEXT:    vid.v v13
+; CHECK-NEXT:    vrsub.vi v14, v13, 7
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v10, v8, v13
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v14
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v9, 3
+; CHECK-NEXT:    vrsub.vi v8, v13, 3
 ; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vrgatherei16.vv v10, v12, v8, v0.t
@@ -639,12 +639,12 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv2r.v v16, v10
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrsub.vi v18, v10, 15
+; CHECK-NEXT:    vid.v v18
+; CHECK-NEXT:    vrsub.vi v20, v18, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v12, v8, v18
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v20
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v10, 7
+; CHECK-NEXT:    vrsub.vi v8, v18, 7
 ; CHECK-NEXT:    li a0, 255
 ; CHECK-NEXT:    vmv.s.x v0, a0
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
@@ -701,12 +701,12 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmv2r.v v16, v10
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v10
-; CHECK-NEXT:    vrsub.vi v11, v10, 7
+; CHECK-NEXT:    vid.v v18
+; CHECK-NEXT:    vrsub.vi v19, v18, 7
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v12, v8, v11
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v19
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v10, 3
+; CHECK-NEXT:    vrsub.vi v8, v18, 3
 ; CHECK-NEXT:    vmv.v.i v0, 15
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; CHECK-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t

From edd018ead705c6012a62a5d83ccc5d3341bcb20a Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang@intel.com>
Date: Mon, 23 Sep 2024 09:41:43 +0800
Subject: [PATCH 335/427] [X86][APX] Fix wrong encoding of promoted KMOV
 instructions due to missing NoCD8 (#109579)

Promoted KMOV* was encoded with CD8 incorrectly, see
https://godbolt.org/z/cax513hG1

(cherry picked from commit 0d334d83a4c7ce16fa1bc0e5e56bbdeaf01c2b2d)
---
 llvm/lib/Target/X86/X86InstrAVX512.td      | 27 +++++++++++-----------
 llvm/test/MC/Disassembler/X86/apx/kmov.txt | 16 +++++++++++++
 llvm/test/MC/X86/apx/kmov-att.s            | 14 ++++++++++-
 llvm/test/MC/X86/apx/kmov-intel.s          | 12 ++++++++++
 4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index da690aea43f5c..1bf201f2bb87e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2617,19 +2617,20 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
                           string OpcodeStr, RegisterClass KRC, ValueType vvt,
                           X86MemOperand x86memop, string Suffix = ""> {
-  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
-      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
-  def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
-                  Sched<[WriteMove]>;
-  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
-                  Sched<[WriteLoad]>;
-  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(store KRC:$src, addr:$dst)]>,
-                  Sched<[WriteStore]>;
+  let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
+    let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
+    def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+                    Sched<[WriteMove]>;
+    def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
+                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set KRC:$dst, (vvt (load addr:$src)))]>,
+                    Sched<[WriteLoad]>, NoCD8;
+    def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
+                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(store KRC:$src, addr:$dst)]>,
+                    Sched<[WriteStore]>, NoCD8;
+  }
 }
 
 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
index 5d947ff39f231..45fedbd0da587 100644
--- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -17,6 +17,22 @@
 # INTEL: {evex} kmovq	k2, k1
 0x62,0xf1,0xfc,0x08,0x90,0xd1
 
+# ATT:   {evex} kmovb   -16(%rax), %k0
+# INTEL: {evex} kmovb   k0, byte ptr [rax - 16]
+0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovw   -16(%rax), %k0
+# INTEL: {evex} kmovw   k0, word ptr [rax - 16]
+0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovd   -16(%rax), %k0
+# INTEL: {evex} kmovd   k0, dword ptr [rax - 16]
+0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovq   -16(%rax), %k0
+# INTEL: {evex} kmovq   k0, qword ptr [rax - 16]
+0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0
+
 # ATT-NOT: {evex}
 # INTEL-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s
index 949ef65be98d4..5f59e0a505b23 100644
--- a/llvm/test/MC/X86/apx/kmov-att.s
+++ b/llvm/test/MC/X86/apx/kmov-att.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
 # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
 
-# ERROR-COUNT-20: error:
+# ERROR-COUNT-24: error:
 # ERROR-NOT: error:
 # CHECK: {evex}	kmovb	%k1, %k2
 # CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
@@ -15,6 +15,18 @@
 # CHECK: {evex}	kmovq	%k1, %k2
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
          {evex}	kmovq	%k1, %k2
+# CHECK: {evex} kmovb   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+         {evex} kmovb   -0x10(%rax), %k0
+# CHECK: {evex} kmovw   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+         {evex} kmovw   -0x10(%rax), %k0
+# CHECK: {evex} kmovd   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+         {evex} kmovd   -0x10(%rax), %k0
+# CHECK: {evex} kmovq   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+         {evex} kmovq   -0x10(%rax), %k0
 
 # CHECK-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-intel.s b/llvm/test/MC/X86/apx/kmov-intel.s
index 0cdbd310062eb..51cec67caf9a0 100644
--- a/llvm/test/MC/X86/apx/kmov-intel.s
+++ b/llvm/test/MC/X86/apx/kmov-intel.s
@@ -12,6 +12,18 @@
 # CHECK: {evex}	kmovq	k2, k1
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
          {evex}	kmovq	k2, k1
+# CHECK: {evex} kmovb   k0, byte ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+         {evex} kmovb   k0, byte ptr [rax - 0x10]
+# CHECK: {evex} kmovw   k0, word ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+         {evex} kmovw   k0, word ptr [rax - 0x10]
+# CHECK: {evex} kmovd   k0, dword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+         {evex} kmovd   k0, dword ptr [rax - 0x10]
+# CHECK: {evex} kmovq   k0, qword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+         {evex} kmovq   k0, qword ptr [rax - 0x10]
 
 # CHECK-NOT: {evex}
 

From d401987fe349a87c53fe25829215b080b70c0c1a Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang@intel.com>
Date: Tue, 24 Sep 2024 16:46:56 +0800
Subject: [PATCH 336/427] [X86][APX] Do not emit {evex} prefix for memory
 variant (#109759)

This was mistakely changed by #109579, which doesn't match with other
EVEX decoding.

(cherry picked from commit 70529b24a30943d46e361d2990268499921e28a2)
---
 llvm/lib/Target/X86/X86InstrAVX512.td      | 27 +++++++++++-----------
 llvm/test/MC/Disassembler/X86/apx/kmov.txt | 16 ++++++-------
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 1bf201f2bb87e..cc1f9090c11ac 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2617,20 +2617,19 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
                           string OpcodeStr, RegisterClass KRC, ValueType vvt,
                           X86MemOperand x86memop, string Suffix = ""> {
-  let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
-    let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
-    def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
-                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
-                    Sched<[WriteMove]>;
-    def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
-                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(set KRC:$dst, (vvt (load addr:$src)))]>,
-                    Sched<[WriteLoad]>, NoCD8;
-    def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
-                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(store KRC:$src, addr:$dst)]>,
-                    Sched<[WriteStore]>, NoCD8;
-  }
+  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
+      explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in
+  def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+                  Sched<[WriteMove]>;
+  def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set KRC:$dst, (vvt (load addr:$src)))]>,
+                  Sched<[WriteLoad]>, NoCD8;
+  def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(store KRC:$src, addr:$dst)]>,
+                  Sched<[WriteStore]>, NoCD8;
 }
 
 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
index 45fedbd0da587..ba77dda64e59f 100644
--- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -17,20 +17,20 @@
 # INTEL: {evex} kmovq	k2, k1
 0x62,0xf1,0xfc,0x08,0x90,0xd1
 
-# ATT:   {evex} kmovb   -16(%rax), %k0
-# INTEL: {evex} kmovb   k0, byte ptr [rax - 16]
+# ATT:   kmovb	-16(%rax), %k0
+# INTEL: kmovb	k0, byte ptr [rax - 16]
 0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0
 
-# ATT:   {evex} kmovw   -16(%rax), %k0
-# INTEL: {evex} kmovw   k0, word ptr [rax - 16]
+# ATT:   kmovw	-16(%rax), %k0
+# INTEL: kmovw	k0, word ptr [rax - 16]
 0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0
 
-# ATT:   {evex} kmovd   -16(%rax), %k0
-# INTEL: {evex} kmovd   k0, dword ptr [rax - 16]
+# ATT:   kmovd	-16(%rax), %k0
+# INTEL: kmovd	k0, dword ptr [rax - 16]
 0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0
 
-# ATT:   {evex} kmovq   -16(%rax), %k0
-# INTEL: {evex} kmovq   k0, qword ptr [rax - 16]
+# ATT:   kmovq	-16(%rax), %k0
+# INTEL: kmovq	k0, qword ptr [rax - 16]
 0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0
 
 # ATT-NOT: {evex}

From d5498c39fe6a17e271ad5a02917103445eb89373 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 1 Oct 2024 16:04:00 +0200
Subject: [PATCH 337/427] Bump version to 19.1.2

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 8bdcd828eb247..e521cf77fd58a 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 1)
+  set(LLVM_VERSION_PATCH 2)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index b2abd372929d1..272375907eb50 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190101
+#  define _LIBCPP_VERSION 190102
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 5e0a56f80bfff..1e041c6b506bb 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 1
+llvm_version_patch = 2
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index b0cac053a938e..cebdf15cefc55 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 1)
+__versioninfo__ = (19, 1, 2)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 26e0241539429..99fd714184410 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 1)
+__versioninfo__ = (19, 1, 2)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 4f844b265d8b023983df99138b5bb355a027794c Mon Sep 17 00:00:00 2001
From: Jakub Mazurkiewicz <mazkuba3@gmail.com>
Date: Wed, 10 Apr 2024 23:12:22 +0200
Subject: [PATCH 338/427] [libc++] Follow-up to "Poison Pills are Too Toxic"

* Update release notes and `Cxx23.html`
* Update `__cpp_lib_ranges` feature test macro
---
 libcxx/docs/FeatureTestMacroTable.rst                    | 2 ++
 libcxx/docs/ReleaseNotes/19.rst                          | 2 +-
 libcxx/docs/Status/Cxx23.rst                             | 1 +
 libcxx/docs/Status/Cxx23Papers.csv                       | 2 +-
 libcxx/include/version                                   | 5 ++++-
 .../algorithm.version.compile.pass.cpp                   | 9 +++++----
 .../functional.version.compile.pass.cpp                  | 9 +++++----
 .../iterator.version.compile.pass.cpp                    | 9 +++++----
 .../memory.version.compile.pass.cpp                      | 9 +++++----
 .../ranges.version.compile.pass.cpp                      | 9 +++++----
 .../version.version.compile.pass.cpp                     | 9 +++++----
 libcxx/utils/generate_feature_test_macro_components.py   | 1 +
 12 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index a1506e115fe70..7f95f0f4e1c17 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -350,6 +350,8 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_print``                                        ``202207L``
     ---------------------------------------------------------- -----------------
+    ``__cpp_lib_ranges``                                       ``202211L``
+    ---------------------------------------------------------- -----------------
     ``__cpp_lib_ranges_as_const``                              *unimplemented*
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_ranges_as_rvalue``                             ``202207L``
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 92896f6b0d11e..63439f0e11850 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -74,7 +74,7 @@ Implemented Papers
 - P0019R8 - ``std::atomic_ref``
 - P2389R2 - Alias template ``dims`` for the ``extents`` of ``mdspan``
 - P1223R5 - ``ranges::find_last()``, ``ranges::find_last_if()``, and ``ranges::find_last_if_not()``
-- P2602R2 - Poison Pills are Too Toxic
+- P2602R2 - Poison Pills are Too Toxic (as DR against C++20)
 - P1981R0 - Rename ``leap`` to ``leap_second``
 - P1982R0 - Rename ``link`` to ``time_zone_link``
 
diff --git a/libcxx/docs/Status/Cxx23.rst b/libcxx/docs/Status/Cxx23.rst
index 23d30c8128d71..8c1cae8b3e3b2 100644
--- a/libcxx/docs/Status/Cxx23.rst
+++ b/libcxx/docs/Status/Cxx23.rst
@@ -44,6 +44,7 @@ Paper Status
    .. [#note-P1413R3] P1413R3: ``std::aligned_storage_t`` and ``std::aligned_union_t`` are marked deprecated, but
       clang doesn't issue a diagnostic for deprecated using template declarations.
    .. [#note-P2520R0] P2520R0: Libc++ implemented this paper as a DR in C++20 as well.
+   .. [#note-P2602R2] P2602R2: Libc++ implemented this paper as a DR in C++20 as well.
    .. [#note-P2711R1] P2711R1: ``join_with_view`` hasn't been done yet since this type isn't implemented yet.
    .. [#note-P2770R0] P2770R0: ``join_with_view`` hasn't been done yet since this type isn't implemented yet.
    .. [#note-P2693R1] P2693R1: The formatter for ``std::thread::id`` is implemented.
diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv
index 92f4908487ae7..f46bb84453202 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -100,7 +100,7 @@
 "`P2396R1 <https://wg21.link/P2396R1>`__","LWG", "Concurrency TS 2 fixes ", "November 2022","","","|concurrency TS|"
 "`P2505R5 <https://wg21.link/P2505R5>`__","LWG", "Monadic Functions for ``std::expected``", "November 2022","|Complete|","17.0",""
 "`P2539R4 <https://wg21.link/P2539R4>`__","LWG", "Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?", "November 2022","|Complete|","18.0","|format|"
-"`P2602R2 <https://wg21.link/P2602R2>`__","LWG", "Poison Pills are Too Toxic", "November 2022","|Complete|","19.0","|ranges|"
+"`P2602R2 <https://wg21.link/P2602R2>`__","LWG", "Poison Pills are Too Toxic", "November 2022","|Complete| [#note-P2602R2]_","19.0","|ranges| |DR|"
 "`P2708R1 <https://wg21.link/P2708R1>`__","LWG", "No Further Fundamentals TSes", "November 2022","|Nothing to do|","",""
 "","","","","","",""
 "`P0290R4 <https://wg21.link/P0290R4>`__","LWG", "``apply()`` for ``synchronized_value<T>``","February 2023","","","|concurrency TS|"
diff --git a/libcxx/include/version b/libcxx/include/version
index fe64343eafbc9..c8a31f77a915e 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -182,8 +182,9 @@ __cpp_lib_philox_engine                                 202406L <random>
 __cpp_lib_polymorphic_allocator                         201902L <memory_resource>
 __cpp_lib_print                                         202207L <ostream> <print>
 __cpp_lib_quoted_string_io                              201304L <iomanip>
-__cpp_lib_ranges                                        202207L <algorithm> <functional> <iterator>
+__cpp_lib_ranges                                        202211L <algorithm> <functional> <iterator>
                                                                 <memory> <ranges>
+                                                        202207L // C++20
 __cpp_lib_ranges_as_const                               202207L <ranges>
 __cpp_lib_ranges_as_rvalue                              202207L <ranges>
 __cpp_lib_ranges_chunk                                  202202L <ranges>
@@ -480,6 +481,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_optional                             202110L
 # define __cpp_lib_out_ptr                              202106L
 # define __cpp_lib_print                                202207L
+# undef  __cpp_lib_ranges
+# define __cpp_lib_ranges                               202211L
 // # define __cpp_lib_ranges_as_const                      202207L
 # define __cpp_lib_ranges_as_rvalue                     202207L
 // # define __cpp_lib_ranges_chunk                         202202L
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
index ded8006063241..6b756535569f6 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
@@ -22,6 +22,7 @@
     __cpp_lib_freestanding_algorithm                        202311L [C++26]
     __cpp_lib_parallel_algorithm                            201603L [C++17]
     __cpp_lib_ranges                                        202207L [C++20]
+                                                            202211L [C++23]
     __cpp_lib_ranges_contains                               202207L [C++23]
     __cpp_lib_ranges_find_last                              202207L [C++23]
     __cpp_lib_ranges_starts_ends_with                       202106L [C++23]
@@ -321,8 +322,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # ifndef __cpp_lib_ranges_contains
@@ -425,8 +426,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # ifndef __cpp_lib_ranges_contains
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
index 27e76e5b2b05a..3c6a23aadc449 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
@@ -28,6 +28,7 @@
     __cpp_lib_move_only_function       202110L [C++23]
     __cpp_lib_not_fn                   201603L [C++17]
     __cpp_lib_ranges                   202207L [C++20]
+                                       202211L [C++23]
     __cpp_lib_reference_wrapper        202403L [C++26]
     __cpp_lib_result_of_sfinae         201210L [C++14]
     __cpp_lib_transparent_operators    201210L [C++14]
@@ -409,8 +410,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # ifdef __cpp_lib_reference_wrapper
@@ -531,8 +532,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # ifndef __cpp_lib_reference_wrapper
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
index 700907ce9bb07..0ea31289e3261 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
@@ -24,6 +24,7 @@
     __cpp_lib_nonmember_container_access    201411L [C++17]
     __cpp_lib_null_iterators                201304L [C++14]
     __cpp_lib_ranges                        202207L [C++20]
+                                            202211L [C++23]
     __cpp_lib_ssize                         201902L [C++20]
 */
 
@@ -255,8 +256,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # ifndef __cpp_lib_ssize
@@ -313,8 +314,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # ifndef __cpp_lib_ssize
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
index aa1170658b103..92e1b862fef9d 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
@@ -29,6 +29,7 @@
     __cpp_lib_out_ptr                             202106L [C++23]
                                                   202311L [C++26]
     __cpp_lib_ranges                              202207L [C++20]
+                                                  202211L [C++23]
     __cpp_lib_raw_memory_algorithms               201606L [C++17]
     __cpp_lib_shared_ptr_arrays                   201611L [C++17]
                                                   201707L [C++20]
@@ -495,8 +496,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # ifndef __cpp_lib_raw_memory_algorithms
@@ -626,8 +627,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # ifndef __cpp_lib_raw_memory_algorithms
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
index 30feacd796d8e..75b2ef57143fc 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
@@ -18,6 +18,7 @@
 /*  Constant                                                Value
     __cpp_lib_default_template_type_for_algorithm_values    202403L [C++26]
     __cpp_lib_ranges                                        202207L [C++20]
+                                                            202211L [C++23]
     __cpp_lib_ranges_as_const                               202207L [C++23]
     __cpp_lib_ranges_as_rvalue                              202207L [C++23]
     __cpp_lib_ranges_chunk                                  202202L [C++23]
@@ -245,8 +246,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # if !defined(_LIBCPP_VERSION)
@@ -364,8 +365,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # if !defined(_LIBCPP_VERSION)
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index b8bad696f1bae..5545298c23670 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -167,6 +167,7 @@
     __cpp_lib_print                                         202207L [C++23]
     __cpp_lib_quoted_string_io                              201304L [C++14]
     __cpp_lib_ranges                                        202207L [C++20]
+                                                            202211L [C++23]
     __cpp_lib_ranges_as_const                               202207L [C++23]
     __cpp_lib_ranges_as_rvalue                              202207L [C++23]
     __cpp_lib_ranges_chunk                                  202202L [C++23]
@@ -5619,8 +5620,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++23"
 # endif
 
 # if !defined(_LIBCPP_VERSION)
@@ -7470,8 +7471,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++26"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++26"
+# if __cpp_lib_ranges != 202211L
+#   error "__cpp_lib_ranges should have the value 202211L in c++26"
 # endif
 
 # if !defined(_LIBCPP_VERSION)
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 6c42748002aee..ab32d29e6f36e 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -992,6 +992,7 @@ def add_version_header(tc):
             "name": "__cpp_lib_ranges",
             "values": {
                 "c++20": 202207,
+                "c++23": 202211,  # P2602R2 Poison Pills are Too Toxic
                 # "c++23": 202302,  # Relaxing Ranges Just A Smidge
                 # "c++26": 202406,  # P2997R1 Removing the common reference requirement from the indirectly invocable concepts (already implemented as a DR)
             },

From bfa99ebbc3092324e80fa1cab5045bdd2f16d48d Mon Sep 17 00:00:00 2001
From: Zentrik <llvm.zentrik@gmail.com>
Date: Tue, 24 Sep 2024 21:33:57 +0100
Subject: [PATCH 339/427] Fix libFuzzer not building with pthreads on Windows
 (#109525)

Fixes https://github.com/llvm/llvm-project/issues/106871

(cherry picked from commit b4130bee6bfd34d8045f02fc9f951bcb5db9d85c)
---
 compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
index db80eb383885e..73eea07cf869f 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -239,6 +239,10 @@ size_t PageSize() {
 }
 
 void SetThreadName(std::thread &thread, const std::string &name) {
+#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) ||                                 \
+    defined(_GLIBCXX_GCC_GTHR_POSIX_H)
+  (void)pthread_setname_np(thread.native_handle(), name.c_str());
+#else
   typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR);
   HMODULE kbase = GetModuleHandleA("KernelBase.dll");
   proc ThreadNameProc =
@@ -253,6 +257,7 @@ void SetThreadName(std::thread &thread, const std::string &name) {
       }
     }
   }
+#endif
 }
 
 } // namespace fuzzer

From 7b2756484bc5ccabcaac11d8e48c93ec4026941f Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 1 Oct 2024 06:11:15 -0700
Subject: [PATCH 340/427] workflow/release-binaries: Checkout sources before
 downloading artifacts (#109349)

The actions/checkout step will clear the current directory, so we need
to checkout the sources first so that the downloaded artifacts won't be
deleted.

(cherry picked from commit 8f2aa9dbad7c1400f66e1ee1c43b071a1905f3e6)
---
 .github/workflows/release-binaries.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index 925912df6843e..f24e25879b96b 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -420,6 +420,14 @@ jobs:
       attestations: write # For artifact attestations
 
     steps:
+    - name: Checkout Release Scripts
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        sparse-checkout: |
+          llvm/utils/release/github-upload-release.py
+          llvm/utils/git/requirements.txt
+        sparse-checkout-cone-mode: false
+
     - name: 'Download artifact'
       uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
       with:
@@ -442,14 +450,6 @@ jobs:
         name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation
         path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl
 
-    - name: Checkout Release Scripts
-      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
-      with:
-        sparse-checkout: |
-          llvm/utils/release/github-upload-release.py
-          llvm/utils/git/requirements.txt
-        sparse-checkout-cone-mode: false
-
     - name: Install Python Requirements
       run: |
         pip install --require-hashes -r ./llvm/utils/git/requirements.txt

From c3c1b047ed3cfe663f6ffde91223ef73516b5fb3 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 1 Oct 2024 06:12:03 -0700
Subject: [PATCH 341/427] workflows/release-documentation: Submit a pull
 request with changes (#108247)

This is instead of pushing directly. Creating a pull request is slightly
more work for the release manager, but it is more secure as we no longer
need a secret with write access to the www-releases repo.

(cherry picked from commit 9cd289fa4a7355e1bfd3129ba9c755f979fd0a72)
---
 .github/workflows/release-documentation.yml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/release-documentation.yml b/.github/workflows/release-documentation.yml
index 70e5f08b6f72e..922c5093f1357 100644
--- a/.github/workflows/release-documentation.yml
+++ b/.github/workflows/release-documentation.yml
@@ -72,17 +72,20 @@ jobs:
           ref: main
           fetch-depth: 0
           path: www-releases
+          persist-credentials: false
 
       - name: Upload Release Notes
         if: env.upload
         env:
-          WWW_RELEASES_TOKEN: ${{ secrets.WWW_RELEASES_TOKEN }}
+          GH_TOKEN: ${{ secrets.WWW_RELEASES_TOKEN }}
         run: |
-          mkdir -p ../www-releases/${{ inputs.release-version }}
-          mv ./docs-build/html-export/* ../www-releases/${{ inputs.release-version }}
-          cd ../www-releases
+          mkdir -p www-releases/${{ inputs.release-version }}
+          mv ./docs-build/html-export/* www-releases/${{ inputs.release-version }}
+          cd www-releases
+          git checkout -b ${{ inputs.release-version }}
           git add ${{ inputs.release-version }}
           git config user.email "llvmbot@llvm.org"
           git config user.name "llvmbot"
           git commit -a -m "Add ${{ inputs.release-version }} documentation"
-          git push "https://$WWW_RELEASES_TOKEN@github.com/${{ github.repository_owner }}/www-releases" main:main
+          git push --force  "https://$GH_TOKEN@github.com/llvmbot/www-releases.git" HEAD:refs/heads/${{ inputs.release-version }}
+          gh pr create -f -B main -H ${{ inputs.release-version }} -R llvmbot/www-releases

From b0b36c00f50fe88d95fe16b29b0fba501c1fb533 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 2 Oct 2024 08:54:25 -0400
Subject: [PATCH 342/427] [libc++] Fix name of is_always_lock_free test which
 was never being run (#106077)

(cherry picked from commit b45661953e6974782b0ccada6f0784db04bc693f)
---
 ...s_lock_free.cpp => is_always_lock_free.pass.cpp} | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
 rename libcxx/test/std/atomics/atomics.lockfree/{is_always_lock_free.cpp => is_always_lock_free.pass.cpp} (94%)

diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
similarity index 94%
rename from libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
rename to libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
index 2dc7f5c765419..723e7b36f5031 100644
--- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.cpp
+++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
@@ -5,8 +5,9 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
+
 // UNSUPPORTED: c++03, c++11, c++14
+// XFAIL: LIBCXX-PICOLIBC-FIXME
 
 // <atomic>
 //
@@ -15,6 +16,10 @@
 //
 // static constexpr bool is_always_lock_free;
 
+// Ignore diagnostic about vector types changing the ABI on some targets, since
+// that is irrelevant for this test.
+// ADDITIONAL_COMPILE_FLAGS: -Wno-psabi
+
 #include <atomic>
 #include <cassert>
 #include <cstddef>
@@ -26,7 +31,8 @@ template <typename T>
 void check_always_lock_free(std::atomic<T> const& a) {
   using InfoT = LockFreeStatusInfo<T>;
 
-  constexpr std::same_as<const bool> decltype(auto) is_always_lock_free = std::atomic<T>::is_always_lock_free;
+  constexpr auto is_always_lock_free = std::atomic<T>::is_always_lock_free;
+  ASSERT_SAME_TYPE(decltype(is_always_lock_free), bool const);
 
   // If we know the status of T for sure, validate the exact result of the function.
   if constexpr (InfoT::status_known) {
@@ -44,7 +50,8 @@ void check_always_lock_free(std::atomic<T> const& a) {
 
   // In all cases, also sanity-check it based on the implication always-lock-free => lock-free.
   if (is_always_lock_free) {
-    std::same_as<bool> decltype(auto) is_lock_free = a.is_lock_free();
+    auto is_lock_free = a.is_lock_free();
+    ASSERT_SAME_TYPE(decltype(is_always_lock_free), bool const);
     assert(is_lock_free);
   }
   ASSERT_NOEXCEPT(a.is_lock_free());

From 7b591d7fce6d9958fec08b01f348b73a307b29f1 Mon Sep 17 00:00:00 2001
From: Keith Smiley <keithbsmiley@gmail.com>
Date: Tue, 1 Oct 2024 10:02:20 -0700
Subject: [PATCH 343/427] workflows/release-binaries: Use static ZSTD on macOS

On macOS the shared zstd library points to a homebrew install that isn't
very stable for users.

(cherry picked from commit 748f5404ccdf28d4beef37efdaeba7a1701ab425)
---
 clang/cmake/caches/Release.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index 6d5f75ca0074e..c4947bf453872 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -101,3 +101,6 @@ set_final_stage_var(LLVM_ENABLE_PROJECTS "${LLVM_RELEASE_ENABLE_PROJECTS}" STRIN
 set_final_stage_var(CPACK_GENERATOR "TXZ" STRING)
 set_final_stage_var(CPACK_ARCHIVE_THREADS "0" STRING)
 
+if(${CMAKE_HOST_SYSTEM_NAME} MATCHES "Darwin")
+  set_final_stage_var(LLVM_USE_STATIC_ZSTD "ON" BOOL)
+endif()

From 3e6207eb818a33e1adb2083b646fa1cc3145ab34 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Tue, 30 Jul 2024 19:53:07 -0400
Subject: [PATCH 344/427] [x86][Windows] Fix chromium build break Windows does
 not support float C89 math functions like: - acosf - asinf - atanf - coshf -
 sinhf - tanhf These 6 libfuncs need to be type promoted.

This PR fixes the bug introduced by https://github.com/llvm/llvm-project/pull/98949
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  10 +-
 .../CodeGen/X86/fp-strict-libcalls-msvc32.ll  | 107 ++++++++++++++++++
 2 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 45989bcd07d37..10f269f803778 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2475,8 +2475,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
     // clang-format off
    for (ISD::NodeType Op :
-         {ISD::FCEIL,  ISD::STRICT_FCEIL,
+         {ISD::FACOS,  ISD::STRICT_FACOS,
+          ISD::FASIN,  ISD::STRICT_FASIN,
+          ISD::FATAN,  ISD::STRICT_FATAN,
+          ISD::FCEIL,  ISD::STRICT_FCEIL,
           ISD::FCOS,   ISD::STRICT_FCOS,
+          ISD::FCOSH,  ISD::STRICT_FCOSH,
           ISD::FEXP,   ISD::STRICT_FEXP,
           ISD::FFLOOR, ISD::STRICT_FFLOOR,
           ISD::FREM,   ISD::STRICT_FREM,
@@ -2484,7 +2488,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
           ISD::FLOG10, ISD::STRICT_FLOG10,
           ISD::FPOW,   ISD::STRICT_FPOW,
           ISD::FSIN,   ISD::STRICT_FSIN,
-          ISD::FTAN,   ISD::STRICT_FTAN})
+          ISD::FSINH,  ISD::STRICT_FSINH,
+          ISD::FTAN,   ISD::STRICT_FTAN,
+          ISD::FTANH,  ISD::STRICT_FTANH})
       if (isOperationExpand(Op, MVT::f32))
         setOperationAction(Op, MVT::f32, Promote);
   // clang-format on
diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
index cfec52c0e6886..5d4e86afc8ace 100644
--- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
@@ -177,6 +177,107 @@ define float @tan(float %x) #0 {
   ret float %result
 }
 
+define float @acos(float %x) #0 {
+; CHECK-LABEL: acos:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _acos
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @asin(float %x) #0 {
+; CHECK-LABEL: asin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _asin
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @atan(float %x) #0 {
+; CHECK-LABEL: atan:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _atan
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @cosh(float %x) #0 {
+; CHECK-LABEL: cosh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _cosh
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @sinh(float %x) #0 {
+; CHECK-LABEL: sinh:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _sinh
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @tanh(float %x) #0 {
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    calll _tanh
+; CHECK-NEXT:    fstps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+  %result = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
 attributes #0 = { strictfp }
 
 declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
@@ -189,3 +290,9 @@ declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata
 declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata)
 declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata)

From 1515e63864ec781c7d751fe66144923a923df1a9 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Wed, 2 Oct 2024 19:06:56 -0700
Subject: [PATCH 345/427] [clang-format] Handle template closer followed by
 empty paretheses (#110408)

Fixes #109925.
---
 clang/lib/Format/TokenAnnotator.cpp           | 42 ++++++++++---------
 clang/lib/Format/UnwrappedLineParser.cpp      | 12 ++++--
 clang/unittests/Format/TokenAnnotatorTest.cpp |  7 ++++
 3 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 4512e539cc794..ad9ed7b47d002 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -189,25 +189,29 @@ class AnnotatingParser {
       next();
     }
 
-    for (bool SeenTernaryOperator = false; CurrentToken;) {
+    for (bool SeenTernaryOperator = false, MaybeAngles = true; CurrentToken;) {
       const bool InExpr = Contexts[Contexts.size() - 2].IsExpression;
       if (CurrentToken->is(tok::greater)) {
         const auto *Next = CurrentToken->Next;
-        // Try to do a better job at looking for ">>" within the condition of
-        // a statement. Conservatively insert spaces between consecutive ">"
-        // tokens to prevent splitting right bitshift operators and potentially
-        // altering program semantics. This check is overly conservative and
-        // will prevent spaces from being inserted in select nested template
-        // parameter cases, but should not alter program semantics.
-        if (Next && Next->is(tok::greater) &&
-            Left->ParentBracket != tok::less &&
-            CurrentToken->getStartOfNonWhitespace() ==
-                Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
-          return false;
-        }
-        if (InExpr && SeenTernaryOperator &&
-            (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
-          return false;
+        if (CurrentToken->isNot(TT_TemplateCloser)) {
+          // Try to do a better job at looking for ">>" within the condition of
+          // a statement. Conservatively insert spaces between consecutive ">"
+          // tokens to prevent splitting right shift operators and potentially
+          // altering program semantics. This check is overly conservative and
+          // will prevent spaces from being inserted in select nested template
+          // parameter cases, but should not alter program semantics.
+          if (Next && Next->is(tok::greater) &&
+              Left->ParentBracket != tok::less &&
+              CurrentToken->getStartOfNonWhitespace() ==
+                  Next->getStartOfNonWhitespace().getLocWithOffset(-1)) {
+            return false;
+          }
+          if (InExpr && SeenTernaryOperator &&
+              (!Next || !Next->isOneOf(tok::l_paren, tok::l_brace))) {
+            return false;
+          }
+          if (!MaybeAngles)
+            return false;
         }
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
@@ -243,11 +247,11 @@ class AnnotatingParser {
       // operator that was misinterpreted because we are parsing template
       // parameters.
       // FIXME: This is getting out of hand, write a decent parser.
-      if (InExpr && !Line.startsWith(tok::kw_template) &&
+      if (MaybeAngles && InExpr && !Line.startsWith(tok::kw_template) &&
           Prev.is(TT_BinaryOperator)) {
         const auto Precedence = Prev.getPrecedence();
         if (Precedence > prec::Conditional && Precedence < prec::Relational)
-          return false;
+          MaybeAngles = false;
       }
       if (Prev.isOneOf(tok::question, tok::colon) && !Style.isProto())
         SeenTernaryOperator = true;
@@ -1615,7 +1619,7 @@ class AnnotatingParser {
         return false;
       break;
     case tok::greater:
-      if (Style.Language != FormatStyle::LK_TextProto)
+      if (Style.Language != FormatStyle::LK_TextProto && Tok->is(TT_Unknown))
         Tok->setType(TT_BinaryOperator);
       if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
         Tok->SpacesRequiredBefore = 1;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 7813d86ff0ea1..f7b3561f6e033 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2551,7 +2551,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
         parseChildBlock();
       break;
     case tok::r_paren: {
-      const auto *Prev = LeftParen->Previous;
+      auto *Prev = LeftParen->Previous;
       if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
           Style.RemoveParentheses > FormatStyle::RPS_Leave) {
         const auto *Next = Tokens->peekNextToken();
@@ -2575,9 +2575,13 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
           FormatTok->Optional = true;
         }
       }
-      if (Prev && Prev->is(TT_TypenameMacro)) {
-        LeftParen->setFinalizedType(TT_TypeDeclarationParen);
-        FormatTok->setFinalizedType(TT_TypeDeclarationParen);
+      if (Prev) {
+        if (Prev->is(TT_TypenameMacro)) {
+          LeftParen->setFinalizedType(TT_TypeDeclarationParen);
+          FormatTok->setFinalizedType(TT_TypeDeclarationParen);
+        } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
+          Prev->setFinalizedType(TT_TemplateCloser);
+        }
       }
       nextToken();
       return SeenEqual;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index dd58fbc70cb91..2f09b380a8713 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3394,6 +3394,13 @@ TEST_F(TokenAnnotatorTest, SplitPenalty) {
   EXPECT_SPLIT_PENALTY(Tokens[7], 23u);
 }
 
+TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
+  auto Tokens = annotate("return FixedInt<N | M>();");
+  ASSERT_EQ(Tokens.size(), 11u) << Tokens;
+  EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
+}
+
 } // namespace
 } // namespace format
 } // namespace clang

From 33a5c88572875d2470aa0ad459493264974cdc2e Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Sun, 6 Oct 2024 18:13:36 -0400
Subject: [PATCH 346/427] [clang-tidy] Avoid capturing a local variable in a
 static lambda in UseRangesCheck (#111282)

Fixes https://github.com/llvm/llvm-project/issues/109367

(cherry picked from commit acf92a47c0ece8562fd745215c478fe2d4ab5896)
---
 .../clang-tidy/boost/UseRangesCheck.cpp        | 18 +++++++++---------
 .../clangd/unittests/ClangdLSPServerTests.cpp  | 16 ++++++++++++++++
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp
index 4022ea0cdaf5e..e45687fde6d9f 100644
--- a/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp
@@ -204,7 +204,7 @@ utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const {
   ReplacerMap Results;
   static const Signature SingleSig = {{0}};
   static const Signature TwoSig = {{0}, {2}};
-  static const auto AddFrom =
+  const auto AddFrom =
       [&Results](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
                  std::initializer_list<StringRef> Names, StringRef Prefix) {
         llvm::SmallString<64> Buffer;
@@ -214,17 +214,17 @@ utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const {
         }
       };
 
-  static const auto AddFromStd =
-      [](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
-         std::initializer_list<StringRef> Names) {
+  const auto AddFromStd =
+      [&](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
+          std::initializer_list<StringRef> Names) {
         AddFrom(Replacer, Names, "std");
       };
 
-  static const auto AddFromBoost =
-      [](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
-         std::initializer_list<
-             std::pair<StringRef, std::initializer_list<StringRef>>>
-             NamespaceAndNames) {
+  const auto AddFromBoost =
+      [&](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
+          std::initializer_list<
+              std::pair<StringRef, std::initializer_list<StringRef>>>
+              NamespaceAndNames) {
         for (auto [Namespace, Names] : NamespaceAndNames)
           AddFrom(Replacer, Names,
                   SmallString<64>{"boost", (Namespace.empty() ? "" : "::"),
diff --git a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp
index 75a140767035b..49a94045ea487 100644
--- a/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp
@@ -262,6 +262,22 @@ TEST_F(LSPTest, ClangTidyRename) {
   EXPECT_EQ(Params, std::vector{llvm::json::Value(std::move(ExpectedEdit))});
 }
 
+TEST_F(LSPTest, ClangTidyCrash_Issue109367) {
+  // This test requires clang-tidy checks to be linked in.
+  if (!CLANGD_TIDY_CHECKS)
+    return;
+  Opts.ClangTidyProvider = [](tidy::ClangTidyOptions &ClangTidyOpts,
+                              llvm::StringRef) {
+    ClangTidyOpts.Checks = {"-*,boost-use-ranges"};
+  };
+  // Check that registering the boost-use-ranges checker's matchers
+  // on two different threads does not cause a crash.
+  auto &Client = start();
+  Client.didOpen("a.cpp", "");
+  Client.didOpen("b.cpp", "");
+  Client.sync();
+}
+
 TEST_F(LSPTest, IncomingCalls) {
   Annotations Code(R"cpp(
     void calle^e(int);

From f3f49528c419e4d7be9df8eeafa57dea2701ac47 Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Mon, 7 Oct 2024 09:38:19 +0800
Subject: [PATCH 347/427] [Clang] Remove the special-casing for
 RequiresExprBodyDecl in BuildResolvedCallExpr() after fd87d765c0 (#111277)

The special-casing for RequiresExprBodyDecl caused a regression, as
reported in #110785.

The original fix for #84020 has been superseded by fd87d765c0, which
establishes a `DependentScopeDeclRefExpr` instead of a
`CXXDependentScopeMemberExpr` for the case in issue. So the spurious
diagnostic in #84020 would no longer occur.

This also merges the test for #84020 together with that for #110785 into
clang/test/SemaTemplate/instantiate-requires-expr.cpp.

No release note because I think this merits a backport.

Fixes #110785

(cherry picked from commit 8c1547055eaf65003f3e6fd024195f4926ff2356)
---
 clang/lib/Sema/SemaExpr.cpp                   |  3 +-
 clang/lib/Sema/TreeTransform.h                |  2 +-
 clang/test/SemaCXX/PR84020.cpp                | 23 --------------
 .../instantiate-requires-expr.cpp             | 31 +++++++++++++++++++
 4 files changed, 33 insertions(+), 26 deletions(-)
 delete mode 100644 clang/test/SemaCXX/PR84020.cpp

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index f56ca398cda81..687b1be945921 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6922,8 +6922,7 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
   }
 
   if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(FDecl))
-    if (!isa<RequiresExprBodyDecl>(CurContext) &&
-        Method->isImplicitObjectMemberFunction())
+    if (Method->isImplicitObjectMemberFunction())
       return ExprError(Diag(LParenLoc, diag::err_member_call_without_object)
                        << Fn->getSourceRange() << 0);
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 51e6a4845bf6f..0ae393524fe03 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -13608,7 +13608,7 @@ bool TreeTransform<Derived>::TransformOverloadExprDecls(OverloadExpr *Old,
     }
 
     AllEmptyPacks &= Decls.empty();
-  };
+  }
 
   // C++ [temp.res]/8.4.2:
   //   The program is ill-formed, no diagnostic required, if [...] lookup for
diff --git a/clang/test/SemaCXX/PR84020.cpp b/clang/test/SemaCXX/PR84020.cpp
deleted file mode 100644
index 8ea5dcc4527ae..0000000000000
--- a/clang/test/SemaCXX/PR84020.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang_cc1 -std=c++20 -verify %s
-// RUN: %clang_cc1 -std=c++23 -verify %s
-// expected-no-diagnostics
-
-struct B {
-    template <typename S>
-    void foo();
-
-    void bar();
-};
-
-template <typename T, typename S>
-struct A : T {
-    auto foo() {
-        static_assert(requires { T::template foo<S>(); });
-        static_assert(requires { T::bar(); });
-    }
-};
-
-int main() {
-    A<B, double> a;
-    a.foo();
-}
diff --git a/clang/test/SemaTemplate/instantiate-requires-expr.cpp b/clang/test/SemaTemplate/instantiate-requires-expr.cpp
index 20a19d731ae16..a1f5456156a06 100644
--- a/clang/test/SemaTemplate/instantiate-requires-expr.cpp
+++ b/clang/test/SemaTemplate/instantiate-requires-expr.cpp
@@ -237,3 +237,34 @@ constexpr bool e_v = true;
 static_assert(e_v<bool>);
 
 } // namespace GH73885
+
+namespace GH84020 {
+
+struct B {
+  template <typename S> void foo();
+  void bar();
+};
+
+template <typename T, typename S> struct A : T {
+  void foo() {
+    static_assert(requires { T::template foo<S>(); });
+    static_assert(requires { T::bar(); });
+  }
+};
+
+template class A<B, double>;
+
+} // namespace GH84020
+
+namespace GH110785 {
+
+struct Foo {
+  static void f(auto) requires(false) {}
+  void f(int) {}
+
+  static_assert([](auto v) {
+    return requires { f(v); };
+  } (0) == false);
+};
+
+} // namespace GH110785

From 149884a1469d83720ea524763fdf461a74af8ecb Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Thu, 19 Sep 2024 13:18:43 +0200
Subject: [PATCH 348/427] [SystemZ] Fix codegen for _[u]128 intrinsics

PR #74625 introduced a regression in the code generated for the
following set of intrinsic:
  vec_add_u128, vec_addc_u128, vec_adde_u128, vec_addec_u128
  vec_sub_u128, vec_subc_u128, vec_sube_u128, vec_subec_u128
  vec_sum_u128, vec_msum_u128
  vec_gfmsum_128, vec_gfmsum_accum_128

This is because the new code incorrectly assumed that a cast
from "unsigned __int128" to "vector unsigned char" would simply
be a bitcast re-interpretation; instead, this cast actually
truncates the __int128 to char and splats the result.

Fixed by adding an intermediate cast via a single-element
128-bit integer vector.

Fixes: https://github.com/llvm/llvm-project/issues/109113
(cherry picked from commit baf9b7da81025c1e3b0704d7ecf667e06f95642b)
---
 clang/lib/Headers/vecintrin.h                 |  28 ++-
 .../CodeGen/SystemZ/builtins-systemz-i128.c   | 165 ++++++++++++++++++
 2 files changed, 188 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGen/SystemZ/builtins-systemz-i128.c

diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index 1f51e32c0d136..609c7cf0b7a6f 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -8359,7 +8359,9 @@ vec_min(__vector double __a, __vector double __b) {
 
 static inline __ATTRS_ai __vector unsigned char
 vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return (__vector unsigned char)((__int128)__a + (__int128)__b);
+  return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
+         ((__int128)__a + (__int128)__b);
 }
 
 /*-- vec_addc ---------------------------------------------------------------*/
@@ -8389,6 +8391,7 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) {
 static inline __ATTRS_ai __vector unsigned char
 vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
 }
 
@@ -8398,6 +8401,7 @@ static inline __ATTRS_ai __vector unsigned char
 vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
               __vector unsigned char __c) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
                              (unsigned __int128)__c);
 }
@@ -8408,6 +8412,7 @@ static inline __ATTRS_ai __vector unsigned char
 vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
                __vector unsigned char __c) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
                                (unsigned __int128)__c);
 }
@@ -8483,7 +8488,9 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) {
 static inline __ATTRS_o_ai __vector unsigned char
 vec_gfmsum_128(__vector unsigned long long __a,
                __vector unsigned long long __b) {
-  return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b);
+  return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
+         __builtin_s390_vgfmg(__a, __b);
 }
 
 /*-- vec_gfmsum_accum -------------------------------------------------------*/
@@ -8513,6 +8520,7 @@ vec_gfmsum_accum_128(__vector unsigned long long __a,
                      __vector unsigned long long __b,
                      __vector unsigned char __c) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
 }
 
@@ -8810,6 +8818,7 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
 
 #define vec_msum_u128(X, Y, Z, W) \
   ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \
+   (unsigned __int128 __attribute__((__vector_size__(16)))) \
    __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
 #endif
 
@@ -8817,7 +8826,9 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
 
 static inline __ATTRS_ai __vector unsigned char
 vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) {
-  return (__vector unsigned char)((__int128)__a - (__int128)__b);
+  return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
+         ((__int128)__a - (__int128)__b);
 }
 
 /*-- vec_subc ---------------------------------------------------------------*/
@@ -8847,6 +8858,7 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) {
 static inline __ATTRS_ai __vector unsigned char
 vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
 }
 
@@ -8856,6 +8868,7 @@ static inline __ATTRS_ai __vector unsigned char
 vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
               __vector unsigned char __c) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
                               (unsigned __int128)__c);
 }
@@ -8866,6 +8879,7 @@ static inline __ATTRS_ai __vector unsigned char
 vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b,
                __vector unsigned char __c) {
   return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
          __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
                                 (unsigned __int128)__c);
 }
@@ -8886,12 +8900,16 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) {
 
 static inline __ATTRS_o_ai __vector unsigned char
 vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) {
-  return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b);
+  return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
+         __builtin_s390_vsumqf(__a, __b);
 }
 
 static inline __ATTRS_o_ai __vector unsigned char
 vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) {
-  return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b);
+  return (__vector unsigned char)
+         (unsigned __int128 __attribute__((__vector_size__(16))))
+         __builtin_s390_vsumqg(__a, __b);
 }
 
 /*-- vec_sum4 ---------------------------------------------------------------*/
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
new file mode 100644
index 0000000000000..896cef515743c
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-i128.c
@@ -0,0 +1,165 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu z14 -triple s390x-linux-gnu \
+// RUN: -O2 -fzvector -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+
+#include <vecintrin.h>
+
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+
+// CHECK-LABEL: define dso_local void @test(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
+// CHECK-NEXT:    [[ADD_I:%.*]] = add nsw i128 [[TMP3]], [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast i128 [[ADD_I]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP4]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i8> [[TMP5]] to i128
+// CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to i128
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call i128 @llvm.s390.vaccq(i128 [[TMP7]], i128 [[TMP8]])
+// CHECK-NEXT:    [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP10]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP11:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP12:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i8> [[TMP11]] to i128
+// CHECK-NEXT:    [[TMP15:%.*]] = bitcast <16 x i8> [[TMP12]] to i128
+// CHECK-NEXT:    [[TMP16:%.*]] = bitcast <16 x i8> [[TMP13]] to i128
+// CHECK-NEXT:    [[TMP17:%.*]] = tail call i128 @llvm.s390.vacq(i128 [[TMP14]], i128 [[TMP15]], i128 [[TMP16]])
+// CHECK-NEXT:    [[TMP18:%.*]] = bitcast i128 [[TMP17]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP18]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP19:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP21:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP22:%.*]] = bitcast <16 x i8> [[TMP19]] to i128
+// CHECK-NEXT:    [[TMP23:%.*]] = bitcast <16 x i8> [[TMP20]] to i128
+// CHECK-NEXT:    [[TMP24:%.*]] = bitcast <16 x i8> [[TMP21]] to i128
+// CHECK-NEXT:    [[TMP25:%.*]] = tail call i128 @llvm.s390.vacccq(i128 [[TMP22]], i128 [[TMP23]], i128 [[TMP24]])
+// CHECK-NEXT:    [[TMP26:%.*]] = bitcast i128 [[TMP25]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP26]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP28:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP29:%.*]] = bitcast <16 x i8> [[TMP27]] to i128
+// CHECK-NEXT:    [[TMP30:%.*]] = bitcast <16 x i8> [[TMP28]] to i128
+// CHECK-NEXT:    [[SUB_I:%.*]] = sub nsw i128 [[TMP29]], [[TMP30]]
+// CHECK-NEXT:    [[TMP31:%.*]] = bitcast i128 [[SUB_I]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP31]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP32:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP34:%.*]] = bitcast <16 x i8> [[TMP32]] to i128
+// CHECK-NEXT:    [[TMP35:%.*]] = bitcast <16 x i8> [[TMP33]] to i128
+// CHECK-NEXT:    [[TMP36:%.*]] = tail call i128 @llvm.s390.vscbiq(i128 [[TMP34]], i128 [[TMP35]])
+// CHECK-NEXT:    [[TMP37:%.*]] = bitcast i128 [[TMP36]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP37]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP38:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP39:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP40:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP41:%.*]] = bitcast <16 x i8> [[TMP38]] to i128
+// CHECK-NEXT:    [[TMP42:%.*]] = bitcast <16 x i8> [[TMP39]] to i128
+// CHECK-NEXT:    [[TMP43:%.*]] = bitcast <16 x i8> [[TMP40]] to i128
+// CHECK-NEXT:    [[TMP44:%.*]] = tail call i128 @llvm.s390.vsbiq(i128 [[TMP41]], i128 [[TMP42]], i128 [[TMP43]])
+// CHECK-NEXT:    [[TMP45:%.*]] = bitcast i128 [[TMP44]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP45]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP46:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP47:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP48:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP49:%.*]] = bitcast <16 x i8> [[TMP46]] to i128
+// CHECK-NEXT:    [[TMP50:%.*]] = bitcast <16 x i8> [[TMP47]] to i128
+// CHECK-NEXT:    [[TMP51:%.*]] = bitcast <16 x i8> [[TMP48]] to i128
+// CHECK-NEXT:    [[TMP52:%.*]] = tail call i128 @llvm.s390.vsbcbiq(i128 [[TMP49]], i128 [[TMP50]], i128 [[TMP51]])
+// CHECK-NEXT:    [[TMP53:%.*]] = bitcast i128 [[TMP52]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP53]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP54:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP55:%.*]] = load volatile <4 x i32>, ptr @vui, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP56:%.*]] = tail call i128 @llvm.s390.vsumqf(<4 x i32> [[TMP54]], <4 x i32> [[TMP55]])
+// CHECK-NEXT:    [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP57]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP58:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP59:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP60:%.*]] = tail call i128 @llvm.s390.vsumqg(<2 x i64> [[TMP58]], <2 x i64> [[TMP59]])
+// CHECK-NEXT:    [[TMP61:%.*]] = bitcast i128 [[TMP60]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP61]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP62:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP63:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP64:%.*]] = tail call i128 @llvm.s390.vgfmg(<2 x i64> [[TMP62]], <2 x i64> [[TMP63]])
+// CHECK-NEXT:    [[TMP65:%.*]] = bitcast i128 [[TMP64]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP65]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP66:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP67:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP68:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP69:%.*]] = bitcast <16 x i8> [[TMP68]] to i128
+// CHECK-NEXT:    [[TMP70:%.*]] = tail call i128 @llvm.s390.vgfmag(<2 x i64> [[TMP66]], <2 x i64> [[TMP67]], i128 [[TMP69]])
+// CHECK-NEXT:    [[TMP71:%.*]] = bitcast i128 [[TMP70]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP71]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP72:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP73:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP74:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP75:%.*]] = bitcast <16 x i8> [[TMP74]] to i128
+// CHECK-NEXT:    [[TMP76:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP72]], <2 x i64> [[TMP73]], i128 [[TMP75]], i32 0)
+// CHECK-NEXT:    [[TMP77:%.*]] = bitcast i128 [[TMP76]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP77]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP78:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP79:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP80:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP81:%.*]] = bitcast <16 x i8> [[TMP80]] to i128
+// CHECK-NEXT:    [[TMP82:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP78]], <2 x i64> [[TMP79]], i128 [[TMP81]], i32 4)
+// CHECK-NEXT:    [[TMP83:%.*]] = bitcast i128 [[TMP82]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP83]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP84:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP85:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP86:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP87:%.*]] = bitcast <16 x i8> [[TMP86]] to i128
+// CHECK-NEXT:    [[TMP88:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP84]], <2 x i64> [[TMP85]], i128 [[TMP87]], i32 8)
+// CHECK-NEXT:    [[TMP89:%.*]] = bitcast i128 [[TMP88]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP89]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP90:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP91:%.*]] = load volatile <2 x i64>, ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP92:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP93:%.*]] = bitcast <16 x i8> [[TMP92]] to i128
+// CHECK-NEXT:    [[TMP94:%.*]] = tail call i128 @llvm.s390.vmslg(<2 x i64> [[TMP90]], <2 x i64> [[TMP91]], i128 [[TMP93]], i32 12)
+// CHECK-NEXT:    [[TMP95:%.*]] = bitcast i128 [[TMP94]] to <16 x i8>
+// CHECK-NEXT:    store volatile <16 x i8> [[TMP95]], ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP96:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP97:%.*]] = load volatile <16 x i8>, ptr @vuc, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP98:%.*]] = tail call <2 x i64> @llvm.s390.vbperm(<16 x i8> [[TMP96]], <16 x i8> [[TMP97]])
+// CHECK-NEXT:    store volatile <2 x i64> [[TMP98]], ptr @vul, align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    ret void
+//
+void test(void) {
+  vuc = vec_add_u128(vuc, vuc);
+  vuc = vec_addc_u128(vuc, vuc);
+  vuc = vec_adde_u128(vuc, vuc, vuc);
+  vuc = vec_addec_u128(vuc, vuc, vuc);
+
+  vuc = vec_sub_u128(vuc, vuc);
+  vuc = vec_subc_u128(vuc, vuc);
+  vuc = vec_sube_u128(vuc, vuc, vuc);
+  vuc = vec_subec_u128(vuc, vuc, vuc);
+
+  vuc = vec_sum_u128(vui, vui);
+  vuc = vec_sum_u128(vul, vul);
+
+  vuc = vec_gfmsum_128(vul, vul);
+  vuc = vec_gfmsum_accum_128(vul, vul, vuc);
+
+  vuc = vec_msum_u128(vul, vul, vuc, 0);
+  vuc = vec_msum_u128(vul, vul, vuc, 4);
+  vuc = vec_msum_u128(vul, vul, vuc, 8);
+  vuc = vec_msum_u128(vul, vul, vuc, 12);
+
+  vul = vec_bperm_u128(vuc, vuc);
+}
+//.
+// CHECK: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+//.

From b42a74febeb6f2486403f4fa37e3169fce9eb091 Mon Sep 17 00:00:00 2001
From: Jan Hendrik Farr <Cydox@users.noreply.github.com>
Date: Thu, 3 Oct 2024 07:16:21 +0200
Subject: [PATCH 349/427] [Clang] Fix 'counted_by' for nested struct pointers
 (#110497)

Fix counted_by attribute for cases where the flexible array member is
accessed through struct pointer inside another struct:

```
struct variable {
        int a;
        int b;
        int length;
        short array[] __attribute__((counted_by(length)));
};

struct bucket {
        int a;
        struct variable *growable;
        int b;
};
```

__builtin_dynamic_object_size(p->growable->array, 0);

This commit makes sure that if the StructBase is both a MemberExpr and a
pointer, it is treated as a pointer. Otherwise clang will generate to
code to access the address of p->growable intead of loading the value of
p->growable->length.

Fixes #110385
---
 clang/lib/CodeGen/CGExpr.cpp                  | 16 ++--
 clang/test/CodeGen/attr-counted-by-pr110385.c | 70 ++++++++++++++++
 clang/test/CodeGen/attr-counted-by.c          | 80 +++++++++----------
 3 files changed, 117 insertions(+), 49 deletions(-)
 create mode 100644 clang/test/CodeGen/attr-counted-by-pr110385.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5f58a64d8386c..8d6f535bba896 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1052,6 +1052,8 @@ class StructAccessBase
     return Visit(E->getBase());
   }
   const Expr *VisitCastExpr(const CastExpr *E) {
+    if (E->getCastKind() == CK_LValueToRValue)
+      return E;
     return Visit(E->getSubExpr());
   }
   const Expr *VisitParenExpr(const ParenExpr *E) {
@@ -1119,19 +1121,15 @@ llvm::Value *CodeGenFunction::EmitCountedByFieldExpr(
     return nullptr;
 
   llvm::Value *Res = nullptr;
-  if (const auto *DRE = dyn_cast<DeclRefExpr>(StructBase)) {
-    Res = EmitDeclRefLValue(DRE).getPointer(*this);
-    Res = Builder.CreateAlignedLoad(ConvertType(DRE->getType()), Res,
-                                    getPointerAlign(), "dre.load");
-  } else if (const MemberExpr *ME = dyn_cast<MemberExpr>(StructBase)) {
-    LValue LV = EmitMemberExpr(ME);
-    Address Addr = LV.getAddress();
-    Res = Addr.emitRawPointer(*this);
-  } else if (StructBase->getType()->isPointerType()) {
+  if (StructBase->getType()->isPointerType()) {
     LValueBaseInfo BaseInfo;
     TBAAAccessInfo TBAAInfo;
     Address Addr = EmitPointerWithAlignment(StructBase, &BaseInfo, &TBAAInfo);
     Res = Addr.emitRawPointer(*this);
+  } else if (StructBase->isLValue()) {
+    LValue LV = EmitLValue(StructBase);
+    Address Addr = LV.getAddress();
+    Res = Addr.emitRawPointer(*this);
   } else {
     return nullptr;
   }
diff --git a/clang/test/CodeGen/attr-counted-by-pr110385.c b/clang/test/CodeGen/attr-counted-by-pr110385.c
new file mode 100644
index 0000000000000..6891d5abe7d5c
--- /dev/null
+++ b/clang/test/CodeGen/attr-counted-by-pr110385.c
@@ -0,0 +1,70 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -Wno-missing-declarations -emit-llvm -o - %s | FileCheck %s
+
+// See #110385
+// Based on reproducer from Kees Cook:
+// https://lore.kernel.org/all/202409170436.C3C6E7F7A@keescook/
+
+struct variable {
+        int a;
+        int b;
+        int length;
+        short array[] __attribute__((counted_by(length)));
+};
+
+struct bucket {
+        int a;
+        struct variable *growable;
+        int b;
+};
+
+struct bucket2 {
+        int a;
+        struct variable growable;
+};
+
+void init(void * __attribute__((pass_dynamic_object_size(0))));
+
+// CHECK-LABEL: define dso_local void @test1(
+// CHECK-SAME: ptr nocapture noundef readonly [[FOO:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[GROWABLE:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
+// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+// CHECK-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
+// CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 1
+// CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
+// CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 0
+// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP4]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+void test1(struct bucket *foo) {
+        init(foo->growable->array);
+}
+
+// CHECK-LABEL: define dso_local void @test2(
+// CHECK-SAME: ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 16
+// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 12
+// CHECK-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
+// CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
+// CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
+// CHECK-NEXT:    tail call void @init(ptr noundef nonnull [[ARRAY]], i64 noundef [[TMP3]]) #[[ATTR2]]
+// CHECK-NEXT:    ret void
+//
+void test2(struct bucket2 *foo) {
+        init(foo->growable.array);
+}
+//.
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META7:![0-9]+]], i64 8}
+// CHECK: [[META3]] = !{!"bucket", [[META4:![0-9]+]], i64 0, [[META7]], i64 8, [[META4]], i64 16}
+// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
+// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META7]] = !{!"any pointer", [[META5]], i64 0}
+//.
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index 46a6c2b492dbe..322ef2d0f7db1 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -318,36 +318,36 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV1:%.*]] = select i1 [[TMP2]], i32 [[TMP5]], i32 0
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV1]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD7:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD6:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
-// SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[ADD]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD7]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[IDXPROM13]], [[TMP6]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP7]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds16:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM13]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[ADD]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD6]] to i64, !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[IDXPROM12]], [[TMP6]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP7]], label [[CONT19:%.*]], label [[HANDLER_OUT_OF_BOUNDS15:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       handler.out_of_bounds15:
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont20:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD7]], 3
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD7]], 2
+// SANITIZE-WITH-ATTR:       cont19:
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD6]], 3
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD6]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP10:%.*]] = add i32 [[TMP9]], 240
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP11:%.*]] = and i32 [[TMP10]], 252
-// SANITIZE-WITH-ATTR-NEXT:    [[CONV9:%.*]] = select i1 [[TMP8]], i32 [[TMP11]], i32 0
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM13]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV9]], ptr [[ARRAYIDX18]], align 4, !tbaa [[TBAA4]]
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD23:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    [[ADD29:%.*]] = add nsw i32 [[INDEX]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM30:%.*]] = sext i32 [[ADD29]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP12:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD23]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP13:%.*]] = icmp ult i64 [[IDXPROM30]], [[TMP12]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP13]], label [[CONT37:%.*]], label [[HANDLER_OUT_OF_BOUNDS33:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       handler.out_of_bounds33:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM30]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    [[CONV8:%.*]] = select i1 [[TMP8]], i32 [[TMP11]], i32 0
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM12]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV8]], ptr [[ARRAYIDX17]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD21:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[ADD27:%.*]] = add nsw i32 [[INDEX]], 2
+// SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM28:%.*]] = sext i32 [[ADD27]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP12:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD21]] to i64, !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP13:%.*]] = icmp ult i64 [[IDXPROM28]], [[TMP12]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP13]], label [[CONT35:%.*]], label [[HANDLER_OUT_OF_BOUNDS31:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR:       handler.out_of_bounds31:
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       cont37:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM30]]
+// SANITIZE-WITH-ATTR:       cont35:
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM28]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[FAM_IDX]], -1
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP15:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD23]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP15:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD21]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP16:%.*]] = sext i32 [[FAM_IDX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP17:%.*]] = sub nsw i64 [[TMP15]], [[TMP16]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], -1
@@ -355,8 +355,8 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTTR:%.*]] = trunc i64 [[TMP17]] to i32
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP20:%.*]] = shl i32 [[DOTTR]], 2
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP21:%.*]] = and i32 [[TMP20]], 252
-// SANITIZE-WITH-ATTR-NEXT:    [[CONV25:%.*]] = select i1 [[TMP19]], i32 [[TMP21]], i32 0
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV25]], ptr [[ARRAYIDX35]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    [[CONV23:%.*]] = select i1 [[TMP19]], i32 [[TMP21]], i32 0
+// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV23]], ptr [[ARRAYIDX33]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
@@ -373,18 +373,18 @@ size_t test3_bdos(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV1]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD4:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD4]], 2
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD3:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD3]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 240
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD4]], 3
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD3]], 3
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = and i32 [[TMP5]], 252
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV6:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV5:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 0
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[ADD]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM8]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV6]], ptr [[ARRAYIDX9]], align 4, !tbaa [[TBAA2]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD12:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD12]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[ADD]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM7]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV5]], ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD10:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD10]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = sext i32 [[FAM_IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP10:%.*]] = sub nsw i64 [[TMP8]], [[TMP9]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP11:%.*]] = icmp sgt i64 [[TMP10]], -1
@@ -393,11 +393,11 @@ size_t test3_bdos(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTTR:%.*]] = trunc i64 [[TMP10]] to i32
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP14:%.*]] = shl i32 [[DOTTR]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP15:%.*]] = and i32 [[TMP14]], 252
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV14:%.*]] = select i1 [[TMP13]], i32 [[TMP15]], i32 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ADD16:%.*]] = add nsw i32 [[INDEX]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM17:%.*]] = sext i32 [[ADD16]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM17]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV14]], ptr [[ARRAYIDX18]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV12:%.*]] = select i1 [[TMP13]], i32 [[TMP15]], i32 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ADD14:%.*]] = add nsw i32 [[INDEX]], 2
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM15]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV12]], ptr [[ARRAYIDX16]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test4(

From 6c1fd539e43ea3c3bf052495704403a76d4e7979 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Mon, 7 Oct 2024 15:25:47 -0700
Subject: [PATCH 350/427] [Clang] Check that we have the correct RecordDecl

Ensure we have the correct RecordDecl before returning the Expr we're
looking for.
---
 clang/lib/CodeGen/CGExpr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 8d6f535bba896..3ef22b17f7691 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1053,7 +1053,7 @@ class StructAccessBase
   }
   const Expr *VisitCastExpr(const CastExpr *E) {
     if (E->getCastKind() == CK_LValueToRValue)
-      return E;
+      return IsExpectedRecordDecl(E) ? E : nullptr;
     return Visit(E->getSubExpr());
   }
   const Expr *VisitParenExpr(const ParenExpr *E) {

From 1360969b8125a633a3a8ad734c8a369bd3cf47c2 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 19 Sep 2024 15:40:47 -0400
Subject: [PATCH 351/427] [libc++] Adjust the version of __cpp_lib_ranges in
 C++20 mode

This is a (very partial) cherry-pick of #101715 to fix this oversight
in the LLVM 19 release.
---
 libcxx/docs/FeatureTestMacroTable.rst                       | 2 +-
 libcxx/include/version                                      | 4 ++--
 .../algorithm.version.compile.pass.cpp                      | 6 +++---
 .../functional.version.compile.pass.cpp                     | 6 +++---
 .../iterator.version.compile.pass.cpp                       | 6 +++---
 .../support.limits.general/memory.version.compile.pass.cpp  | 6 +++---
 .../support.limits.general/ranges.version.compile.pass.cpp  | 6 +++---
 .../support.limits.general/version.version.compile.pass.cpp | 6 +++---
 libcxx/utils/generate_feature_test_macro_components.py      | 2 +-
 9 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 7f95f0f4e1c17..cbed6693f0a5d 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -266,7 +266,7 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_polymorphic_allocator``                        ``201902L``
     ---------------------------------------------------------- -----------------
-    ``__cpp_lib_ranges``                                       ``202207L``
+    ``__cpp_lib_ranges``                                       ``202110L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_remove_cvref``                                 ``201711L``
     ---------------------------------------------------------- -----------------
diff --git a/libcxx/include/version b/libcxx/include/version
index c8a31f77a915e..76ab6bedafdd0 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -184,7 +184,7 @@ __cpp_lib_print                                         202207L <ostream> <print
 __cpp_lib_quoted_string_io                              201304L <iomanip>
 __cpp_lib_ranges                                        202211L <algorithm> <functional> <iterator>
                                                                 <memory> <ranges>
-                                                        202207L // C++20
+                                                        202110L // C++20
 __cpp_lib_ranges_as_const                               202207L <ranges>
 __cpp_lib_ranges_as_rvalue                              202207L <ranges>
 __cpp_lib_ranges_chunk                                  202202L <ranges>
@@ -429,7 +429,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # if _LIBCPP_AVAILABILITY_HAS_PMR
 #   define __cpp_lib_polymorphic_allocator              201902L
 # endif
-# define __cpp_lib_ranges                               202207L
+# define __cpp_lib_ranges                               202110L
 # define __cpp_lib_remove_cvref                         201711L
 # if !defined(_LIBCPP_HAS_NO_THREADS) && _LIBCPP_AVAILABILITY_HAS_SYNC
 #   define __cpp_lib_semaphore                          201907L
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
index 6b756535569f6..65da07ef02925 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
@@ -21,7 +21,7 @@
     __cpp_lib_default_template_type_for_algorithm_values    202403L [C++26]
     __cpp_lib_freestanding_algorithm                        202311L [C++26]
     __cpp_lib_parallel_algorithm                            201603L [C++17]
-    __cpp_lib_ranges                                        202207L [C++20]
+    __cpp_lib_ranges                                        202110L [C++20]
                                                             202211L [C++23]
     __cpp_lib_ranges_contains                               202207L [C++23]
     __cpp_lib_ranges_find_last                              202207L [C++23]
@@ -245,8 +245,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifdef __cpp_lib_ranges_contains
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
index 3c6a23aadc449..a9e9abb199da0 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp
@@ -27,7 +27,7 @@
     __cpp_lib_invoke_r                 202106L [C++23]
     __cpp_lib_move_only_function       202110L [C++23]
     __cpp_lib_not_fn                   201603L [C++17]
-    __cpp_lib_ranges                   202207L [C++20]
+    __cpp_lib_ranges                   202110L [C++20]
                                        202211L [C++23]
     __cpp_lib_reference_wrapper        202403L [C++26]
     __cpp_lib_result_of_sfinae         201210L [C++14]
@@ -306,8 +306,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifdef __cpp_lib_reference_wrapper
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
index 0ea31289e3261..f66406dff777f 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/iterator.version.compile.pass.cpp
@@ -23,7 +23,7 @@
     __cpp_lib_move_iterator_concept         202207L [C++20]
     __cpp_lib_nonmember_container_access    201411L [C++17]
     __cpp_lib_null_iterators                201304L [C++14]
-    __cpp_lib_ranges                        202207L [C++20]
+    __cpp_lib_ranges                        202110L [C++20]
                                             202211L [C++23]
     __cpp_lib_ssize                         201902L [C++20]
 */
@@ -198,8 +198,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifndef __cpp_lib_ssize
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
index 92e1b862fef9d..ce7d7faaea87f 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.compile.pass.cpp
@@ -28,7 +28,7 @@
     __cpp_lib_make_unique                         201304L [C++14]
     __cpp_lib_out_ptr                             202106L [C++23]
                                                   202311L [C++26]
-    __cpp_lib_ranges                              202207L [C++20]
+    __cpp_lib_ranges                              202110L [C++20]
                                                   202211L [C++23]
     __cpp_lib_raw_memory_algorithms               201606L [C++17]
     __cpp_lib_shared_ptr_arrays                   201611L [C++17]
@@ -365,8 +365,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifndef __cpp_lib_raw_memory_algorithms
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
index 75b2ef57143fc..148604759f0b7 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp
@@ -17,7 +17,7 @@
 
 /*  Constant                                                Value
     __cpp_lib_default_template_type_for_algorithm_values    202403L [C++26]
-    __cpp_lib_ranges                                        202207L [C++20]
+    __cpp_lib_ranges                                        202110L [C++20]
                                                             202211L [C++23]
     __cpp_lib_ranges_as_const                               202207L [C++23]
     __cpp_lib_ranges_as_rvalue                              202207L [C++23]
@@ -193,8 +193,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifdef __cpp_lib_ranges_as_const
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index 5545298c23670..39befda9ac014 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -166,7 +166,7 @@
     __cpp_lib_polymorphic_allocator                         201902L [C++20]
     __cpp_lib_print                                         202207L [C++23]
     __cpp_lib_quoted_string_io                              201304L [C++14]
-    __cpp_lib_ranges                                        202207L [C++20]
+    __cpp_lib_ranges                                        202110L [C++20]
                                                             202211L [C++23]
     __cpp_lib_ranges_as_const                               202207L [C++23]
     __cpp_lib_ranges_as_rvalue                              202207L [C++23]
@@ -4132,8 +4132,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifdef __cpp_lib_ranges_as_const
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index ab32d29e6f36e..0d7ce2063aa35 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -991,7 +991,7 @@ def add_version_header(tc):
         {
             "name": "__cpp_lib_ranges",
             "values": {
-                "c++20": 202207,
+                "c++20": 202110, # P2415R2 What is a view?
                 "c++23": 202211,  # P2602R2 Poison Pills are Too Toxic
                 # "c++23": 202302,  # Relaxing Ranges Just A Smidge
                 # "c++26": 202406,  # P2997R1 Removing the common reference requirement from the indirectly invocable concepts (already implemented as a DR)

From c36d7fd568a5da5b9ea5b0e6f60b1210ecc512d0 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 1 Oct 2024 09:39:44 -0400
Subject: [PATCH 352/427] [libc++] Avoid re-exporting a few specific symbols
 from libc++abi (#109054)

In 6a884a9aef39, I synchronized the export list of libc++abi to the
export list of libc++. From the linker's perspective, this caused these
symbols to be taken from libc++.dylib instead of libc++abi.dylib.

However, that can be problematic when back-deploying. Indeed, this means
that the linker will encode an undefined reference to be fullfilled by
libc++.dylib, but when backdeploying against an older system, that
symbol might only be available in libc++abi.dylib.

Most of the symbols that started being re-exported after 6a884a9aef39
turn out to be implementation details of libc++abi, so nobody really
depends on them and this back-deployment issue is inconsequential.

However, we ran into issues with a few of these symbols while testing
LLVM 19, which led to this patch. This slipped between the cracks and
that is why the patch is coming so long after the original patch landed.

In the future, a follow-up cleanup would be to stop exporting most of
the _cxxabiv1_foo_type_infoE symbols from both libc++abi and libc++
since they are implementation details that nobody should be relying on.

rdar://131984512
(cherry picked from commit 677e8cd6ff51e178bcb4669104763f71a2de106c)
---
 libcxx/lib/abi/CHANGELOG.TXT                  | 30 +++++++++++++++++++
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  7 -----
 ...bcxxabi.v1.stable.exceptions.nonew.abilist |  7 -----
 libcxxabi/lib/cxxabiv1.exp                    |  3 --
 libcxxabi/lib/itanium-base.exp                |  1 -
 libcxxabi/lib/symbols-not-reexported.exp      | 13 ++++++++
 libcxxabi/src/CMakeLists.txt                  | 10 ++++++-
 7 files changed, 52 insertions(+), 19 deletions(-)
 create mode 100644 libcxxabi/lib/symbols-not-reexported.exp

diff --git a/libcxx/lib/abi/CHANGELOG.TXT b/libcxx/lib/abi/CHANGELOG.TXT
index 32526f1786c6d..68c9d980a016e 100644
--- a/libcxx/lib/abi/CHANGELOG.TXT
+++ b/libcxx/lib/abi/CHANGELOG.TXT
@@ -16,6 +16,36 @@ New entries should be added directly below the "Version" header.
 Version 19.0
 ------------
 
+* [libc++] Avoid re-exporting a few specific symbols from libc++abi
+
+  In 6a884a9aef39, I synchronized the export list of libc++abi to the
+  export list of libc++. From the linker's perspective, this caused
+  these symbols to be taken from libc++.dylib instead of libc++abi.dylib.
+
+  However, that can be problematic when back-deploying. Indeed, this means
+  that the linker will encode an undefined reference to be fullfilled by
+  libc++.dylib, but when backdeploying against an older system, that symbol
+  might only be available in libc++abi.dylib.
+
+  Most of the symbols that started being re-exported after 6a884a9aef39
+  turn out to be implementation details of libc++abi, so nobody really
+  depends on them and this back-deployment issue is inconsequential.
+
+  However, we ran into issues with a few of these symbols while testing
+  LLVM 19, which led to this patch.
+
+  In the future, a follow-up cleanup would be to stop exporting most of
+  the _cxxabiv1_foo_type_infoE symbols from both libc++abi and libc++
+  since they are implementation details that nobody should be relying
+  on.
+
+  <arch>-apple-darwin
+  -------------------
+  Symbol not reexported anymore: ___cxa_rethrow_primary_exception
+  Symbol not reexported anymore: __ZTIN10__cxxabiv117__class_type_infoE
+  Symbol not reexported anymore: __ZTIN10__cxxabiv120__si_class_type_infoE
+  Symbol not reexported anymore: __ZTIN10__cxxabiv121__vmi_class_type_infoE
+
 * [libc++] Always keep libc++abi re-exports up-to-date
 
   This patch makes sure that the set of libc++abi symbols re-exported from libc++
diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index 917388f86811f..32acae46e292d 100644
--- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -78,12 +78,9 @@
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv116__enum_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv116__shim_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv117__array_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv117__class_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv117__pbase_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv119__pointer_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv120__function_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv120__si_class_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv121__vmi_class_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv123__fundamental_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv129__pointer_to_member_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIPDh', 'type': 'U'}
@@ -2002,12 +1999,9 @@
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv116__enum_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv116__shim_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv117__array_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv117__class_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv117__pbase_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv119__pointer_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv120__function_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv120__si_class_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv121__vmi_class_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv123__fundamental_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv129__pointer_to_member_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTINSt12experimental15fundamentals_v112bad_any_castE', 'size': 0, 'type': 'OBJECT'}
@@ -2615,7 +2609,6 @@
 {'is_defined': True, 'name': '___cxa_new_handler', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_pure_virtual', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_rethrow', 'type': 'I'}
-{'is_defined': True, 'name': '___cxa_rethrow_primary_exception', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_terminate_handler', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_throw', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_throw_bad_array_new_length', 'type': 'I'}
diff --git a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
index defe235a283c2..7b5d77499d55f 100644
--- a/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
+++ b/libcxx/lib/abi/x86_64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist
@@ -78,12 +78,9 @@
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv116__enum_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv116__shim_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv117__array_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv117__class_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv117__pbase_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv119__pointer_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv120__function_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv120__si_class_type_infoE', 'type': 'U'}
-{'is_defined': False, 'name': '__ZTIN10__cxxabiv121__vmi_class_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv123__fundamental_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIN10__cxxabiv129__pointer_to_member_type_infoE', 'type': 'U'}
 {'is_defined': False, 'name': '__ZTIPDh', 'type': 'U'}
@@ -2002,12 +1999,9 @@
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv116__enum_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv116__shim_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv117__array_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv117__class_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv117__pbase_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv119__pointer_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv120__function_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv120__si_class_type_infoE', 'type': 'I'}
-{'is_defined': True, 'name': '__ZTIN10__cxxabiv121__vmi_class_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv123__fundamental_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTIN10__cxxabiv129__pointer_to_member_type_infoE', 'type': 'I'}
 {'is_defined': True, 'name': '__ZTINSt12experimental15fundamentals_v112bad_any_castE', 'size': 0, 'type': 'OBJECT'}
@@ -2649,7 +2643,6 @@
 {'is_defined': True, 'name': '___cxa_new_handler', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_pure_virtual', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_rethrow', 'type': 'I'}
-{'is_defined': True, 'name': '___cxa_rethrow_primary_exception', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_terminate_handler', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_throw', 'type': 'I'}
 {'is_defined': True, 'name': '___cxa_throw_bad_array_new_length', 'type': 'I'}
diff --git a/libcxxabi/lib/cxxabiv1.exp b/libcxxabi/lib/cxxabiv1.exp
index b1bab45ef3347..0a22831a63d0b 100644
--- a/libcxxabi/lib/cxxabiv1.exp
+++ b/libcxxabi/lib/cxxabiv1.exp
@@ -2,12 +2,9 @@
 __ZTIN10__cxxabiv116__enum_type_infoE
 __ZTIN10__cxxabiv116__shim_type_infoE
 __ZTIN10__cxxabiv117__array_type_infoE
-__ZTIN10__cxxabiv117__class_type_infoE
 __ZTIN10__cxxabiv117__pbase_type_infoE
 __ZTIN10__cxxabiv119__pointer_type_infoE
 __ZTIN10__cxxabiv120__function_type_infoE
-__ZTIN10__cxxabiv120__si_class_type_infoE
-__ZTIN10__cxxabiv121__vmi_class_type_infoE
 __ZTIN10__cxxabiv123__fundamental_type_infoE
 __ZTIN10__cxxabiv129__pointer_to_member_type_infoE
 
diff --git a/libcxxabi/lib/itanium-base.exp b/libcxxabi/lib/itanium-base.exp
index 002e062df423e..0c23a19c94c52 100644
--- a/libcxxabi/lib/itanium-base.exp
+++ b/libcxxabi/lib/itanium-base.exp
@@ -12,7 +12,6 @@ ___cxa_guard_acquire
 ___cxa_guard_release
 ___cxa_increment_exception_refcount
 ___cxa_pure_virtual
-___cxa_rethrow_primary_exception
 ___cxa_throw_bad_array_new_length
 ___cxa_uncaught_exception
 ___cxa_uncaught_exceptions
diff --git a/libcxxabi/lib/symbols-not-reexported.exp b/libcxxabi/lib/symbols-not-reexported.exp
new file mode 100644
index 0000000000000..ea5d0b3fc15d7
--- /dev/null
+++ b/libcxxabi/lib/symbols-not-reexported.exp
@@ -0,0 +1,13 @@
+# These symbols are not re-exported from libc++ because providing a definition in libc++ causes
+# issues with some clients when backdeploying.
+
+# These symbols are implementation details of libc++abi, but they are referenced from UBSan
+# (which is a total hack). We'll need to figure out how to decouple UBSan from these details
+# before we can stop exporting them from libc++abi.
+__ZTIN10__cxxabiv117__class_type_infoE
+__ZTIN10__cxxabiv120__si_class_type_infoE
+__ZTIN10__cxxabiv121__vmi_class_type_infoE
+
+# This symbol is not an implementation detail of libc++abi, but it also causes issues when moving
+# to libc++. This needs further investigation.
+___cxa_rethrow_primary_exception
diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index c1a7bcb14eb19..f95a8c471fd39 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -214,14 +214,22 @@ if (LIBCXXABI_ENABLE_SHARED)
   endif()
 
   add_library(cxxabi-reexports INTERFACE)
-  function(reexport_symbols file)
+  function(export_symbols file)
     # -exported_symbols_list is only available on Apple platforms
     if (APPLE)
       target_link_libraries(cxxabi_shared PRIVATE "-Wl,-exported_symbols_list,${file}")
+    endif()
+  endfunction()
+
+  function(reexport_symbols file)
+    export_symbols("${file}")
+    # -reexported_symbols_list is only available on Apple platforms
+    if (APPLE)
       target_link_libraries(cxxabi-reexports INTERFACE "-Wl,-reexported_symbols_list,${file}")
     endif()
   endfunction()
 
+  export_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/symbols-not-reexported.exp")
   reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/cxxabiv1.exp")
   reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/fundamental-types.exp")
   reexport_symbols("${CMAKE_CURRENT_SOURCE_DIR}/../lib/itanium-base.exp")

From f12830aac9040b03d3d53c66d65528bd26a6e47a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 30 Sep 2024 13:43:53 +0400
Subject: [PATCH 353/427] FastISel: Fix incorrectly using getPointerTy
 (#110465)

This was using the default address space instead of the
correct one.

Fixes #56055

Keep old method around for ABI compatibility on the release branch.

(cherry picked from commit 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785)
---
 llvm/include/llvm/CodeGen/FastISel.h       |  7 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp |  8 +--
 llvm/lib/Target/X86/X86FastISel.cpp        |  4 +-
 llvm/test/CodeGen/X86/issue56055.ll        | 81 ++++++++++++++++++++++
 4 files changed, 94 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/issue56055.ll

diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 3cbc35400181d..f91bd692accad 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -275,7 +275,12 @@ class FastISel {
 
   /// This is a wrapper around getRegForValue that also takes care of
   /// truncating or sign-extending the given getelementptr index value.
-  Register getRegForGEPIndex(const Value *Idx);
+  Register getRegForGEPIndex(MVT PtrVT, const Value *Idx);
+
+  /// Retained for ABI compatibility in release branch.
+  Register getRegForGEPIndex(const Value *Idx) {
+    return getRegForGEPIndex(TLI.getPointerTy(DL), Idx);
+  }
 
   /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f783355190..246acc7f40583 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -380,14 +380,13 @@ void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) {
   }
 }
 
-Register FastISel::getRegForGEPIndex(const Value *Idx) {
+Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) {
   Register IdxN = getRegForValue(Idx);
   if (!IdxN)
     // Unhandled operand. Halt "fast" selection and bail.
     return Register();
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
     IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
@@ -543,7 +542,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
   uint64_t TotalOffs = 0;
   // FIXME: What's a good SWAG number for MaxOffs?
   uint64_t MaxOffs = 2048;
-  MVT VT = TLI.getPointerTy(DL);
+  MVT VT = TLI.getValueType(DL, I->getType()).getSimpleVT();
+
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
@@ -584,7 +584,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
 
       // N = N + Idx * ElementSize;
       uint64_t ElementSize = GTI.getSequentialElementStride(DL);
-      Register IdxN = getRegForGEPIndex(Idx);
+      Register IdxN = getRegForGEPIndex(VT, Idx);
       if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
         return false;
 
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 2eae155956368..5d594bd54fbfc 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -902,6 +902,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     uint64_t Disp = (int32_t)AM.Disp;
     unsigned IndexReg = AM.IndexReg;
     unsigned Scale = AM.Scale;
+    MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT();
+
     gep_type_iterator GTI = gep_type_begin(U);
     // Iterate through the indices, folding what we can. Constants can be
     // folded, and one dynamic index can be handled, if the scale is supported.
@@ -937,7 +939,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
             (S == 1 || S == 2 || S == 4 || S == 8)) {
           // Scaled-index addressing.
           Scale = S;
-          IndexReg = getRegForGEPIndex(Op);
+          IndexReg = getRegForGEPIndex(PtrVT, Op);
           if (IndexReg == 0)
             return false;
           break;
diff --git a/llvm/test/CodeGen/X86/issue56055.ll b/llvm/test/CodeGen/X86/issue56055.ll
new file mode 100644
index 0000000000000..27eaf13e3b00b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/issue56055.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -fast-isel < %s | FileCheck -check-prefixes=CHECK,FASTISEL %s
+; RUN: llc < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-windows-msvc"
+
+define void @issue56055(ptr addrspace(270) %ptr, ptr %out) {
+; CHECK-LABEL: issue56055:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addl $2, %ecx
+; CHECK-NEXT:    movl %ecx, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i32 2
+  store ptr addrspace(270) %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_vector(<2 x ptr addrspace(270)> %ptr, ptr %out) {
+; CHECK-LABEL: issue56055_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa (%rcx), %xmm0
+; CHECK-NEXT:    paddd __xmm@00000000000000000000000200000002(%rip), %xmm0
+; CHECK-NEXT:    movq %xmm0, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i32> <i32 2, i32 2>
+  store <2 x ptr addrspace(270)> %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_small_idx(ptr addrspace(270) %ptr, ptr %out, i16 %idx) {
+; CHECK-LABEL: issue56055_small_idx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl %r8w, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    movl %eax, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i16 %idx
+  store ptr addrspace(270) %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_small_idx_vector(<2 x ptr addrspace(270)> %ptr, ptr %out, <2 x i16> %idx) {
+; CHECK-LABEL: issue56055_small_idx_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,0,2,1,4,5,6,7]
+; CHECK-NEXT:    psrad $16, %xmm0
+; CHECK-NEXT:    paddd (%rcx), %xmm0
+; CHECK-NEXT:    movq %xmm0, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i16> %idx
+  store <2 x ptr addrspace(270)> %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_large_idx(ptr addrspace(270) %ptr, ptr %out, i64 %idx) {
+; CHECK-LABEL: issue56055_large_idx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addl %ecx, %r8d
+; CHECK-NEXT:    movl %r8d, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i64 %idx
+  store ptr addrspace(270) %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_large_idx_vector(<2 x ptr addrspace(270)> %ptr, ptr %out, <2 x i64> %idx) {
+; CHECK-LABEL: issue56055_large_idx_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = mem[0,2,2,3]
+; CHECK-NEXT:    paddd (%rcx), %xmm0
+; CHECK-NEXT:    movq %xmm0, (%rdx)
+; CHECK-NEXT:    retq
+  %add.ptr = getelementptr inbounds i8, <2 x ptr addrspace(270)> %ptr, <2 x i64> %idx
+  store <2 x ptr addrspace(270)> %add.ptr, ptr %out
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FASTISEL: {{.*}}
+; SDAG: {{.*}}

From 139d737ae04cea31232e8b0c83d8687d0234af3d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 2 Oct 2024 16:15:40 +0400
Subject: [PATCH 354/427] Move out of line

---
 llvm/include/llvm/CodeGen/FastISel.h       | 4 +---
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 4 ++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index f91bd692accad..95e8004cc09c7 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -278,9 +278,7 @@ class FastISel {
   Register getRegForGEPIndex(MVT PtrVT, const Value *Idx);
 
   /// Retained for ABI compatibility in release branch.
-  Register getRegForGEPIndex(const Value *Idx) {
-    return getRegForGEPIndex(TLI.getPointerTy(DL), Idx);
-  }
+  Register getRegForGEPIndex(const Value *Idx);
 
   /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 246acc7f40583..398381a8164b2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -397,6 +397,10 @@ Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) {
   return IdxN;
 }
 
+Register FastISel::getRegForGEPIndex(const Value *Idx) {
+  return getRegForGEPIndex(TLI.getPointerTy(DL), Idx);
+}
+
 void FastISel::recomputeInsertPt() {
   if (getLastLocalValue()) {
     FuncInfo.InsertPt = getLastLocalValue();

From 92f4a2bf0a9d3962a25d15c1df5b6824950c74dc Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 4 Oct 2024 18:09:53 +0100
Subject: [PATCH 355/427] [x86] combineMUL - when looking for a vector multiply
 by splat constant, ensure we're only accepting ConstantInt splat scalars.

Fixes #111170

(cherry picked from commit 9459d729d22b7bfedad9d3a4237162077c6984a4)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/X86/pr111170.ll       | 33 +++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr111170.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 10f269f803778..4e3a181f9a3ae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47899,7 +47899,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
     if (VT.isVector())
       if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
         if (auto *SplatC = RawC->getSplatValue())
-          C = &(SplatC->getUniqueInteger());
+          if (auto *SplatCI = dyn_cast<ConstantInt>(SplatC))
+            C = &(SplatCI->getValue());
 
     if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
       return SDValue();
diff --git a/llvm/test/CodeGen/X86/pr111170.ll b/llvm/test/CodeGen/X86/pr111170.ll
new file mode 100644
index 0000000000000..145bf7119edcb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr111170.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-pc-windows-msvc -mcpu=corei7-avx | FileCheck %s
+
+define void @PR111170(<16 x i32> %x_load, ptr %offsetsPtr.i) {
+; CHECK-LABEL: PR111170:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2.80259693E-44,2.80259693E-44,2.80259693E-44,2.80259693E-44]
+; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm3
+; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; CHECK-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpmulld %xmm2, %xmm0, %xmm4
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqu %xmm0, 16(%eax)
+; CHECK-NEXT:    vmovdqu %xmm4, (%eax)
+; CHECK-NEXT:    vmovdqu %xmm1, 48(%eax)
+; CHECK-NEXT:    vmovdqu %xmm3, 32(%eax)
+; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovdqu %xmm0, 16
+; CHECK-NEXT:    vmovdqu %xmm0, 0
+; CHECK-NEXT:    vmovdqu %xmm0, 48
+; CHECK-NEXT:    vmovdqu %xmm0, 32
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
+  %mul__x_load = mul <16 x i32> <i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20, i32 20>, %x_load
+  store <16 x i32> %mul__x_load, ptr %offsetsPtr.i, align 4
+  %blend1.i12.i = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> zeroinitializer, <8 x float> <float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000, float 0x36E4000000000000>, <8 x float> zeroinitializer)
+  %blend.i13.i = shufflevector <8 x float> zeroinitializer, <8 x float> %blend1.i12.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %blendAsInt.i14.i = bitcast <16 x float> %blend.i13.i to <16 x i32>
+  store <16 x i32> %blendAsInt.i14.i, ptr null, align 4
+  ret void
+}

From dedbdfb70daf06fc07bb4dfbec1d18af8fcef28f Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Tue, 8 Oct 2024 23:03:32 +0200
Subject: [PATCH 356/427] [Clang] Improve type traits recognition in
 `__has_builtin` (#111516)

`__has_builtin` was relying on reversible identifiers and string
matching to recognize builtin-type traits, leading to some newer type
traits not being recognized.

Fixes #111477
---
 clang/include/clang/Basic/TokenKinds.def  |  5 ++-
 clang/lib/Lex/PPMacroExpansion.cpp        | 52 ++++++++++++++---------
 clang/test/Preprocessor/feature_tests.cpp |  6 ++-
 3 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 8c54661e65cf4..0526fbf51bd91 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -64,6 +64,10 @@
 #ifndef EXPRESSION_TRAIT
 #define EXPRESSION_TRAIT(I,E,K) KEYWORD(I,K)
 #endif
+#ifndef TRANSFORM_TYPE_TRAIT_DEF
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Trait) KEYWORD(__##Trait, KEYCXX)
+#endif
+
 #ifndef ALIAS
 #define ALIAS(X,Y,Z)
 #endif
@@ -534,7 +538,6 @@ TYPE_TRAIT_1(__has_unique_object_representations,
 TYPE_TRAIT_2(__is_layout_compatible, IsLayoutCompatible, KEYCXX)
 TYPE_TRAIT_2(__is_pointer_interconvertible_base_of, IsPointerInterconvertibleBaseOf, KEYCXX)
 
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) KEYWORD(__##Trait, KEYCXX)
 #include "clang/Basic/TransformTypeTraits.def"
 
 // Clang-only C++ Type Traits
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 3913ff08c2eb5..fb88ec2bf603f 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1602,6 +1602,34 @@ static bool isTargetVariantEnvironment(const TargetInfo &TI,
   return false;
 }
 
+static bool IsBuiltinTrait(Token &Tok) {
+
+#define TYPE_TRAIT_1(Spelling, Name, Key)                                      \
+  case tok::kw_##Spelling:                                                     \
+    return true;
+#define TYPE_TRAIT_2(Spelling, Name, Key)                                      \
+  case tok::kw_##Spelling:                                                     \
+    return true;
+#define TYPE_TRAIT_N(Spelling, Name, Key)                                      \
+  case tok::kw_##Spelling:                                                     \
+    return true;
+#define ARRAY_TYPE_TRAIT(Spelling, Name, Key)                                  \
+  case tok::kw_##Spelling:                                                     \
+    return true;
+#define EXPRESSION_TRAIT(Spelling, Name, Key)                                  \
+  case tok::kw_##Spelling:                                                     \
+    return true;
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Spelling)                                  \
+  case tok::kw___##Spelling:                                                   \
+    return true;
+
+  switch (Tok.getKind()) {
+  default:
+    return false;
+#include "clang/Basic/TokenKinds.def"
+  }
+}
+
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
 /// as a builtin macro, handle it and return the next token as 'Tok'.
 void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1798,25 +1826,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
                 getTargetInfo().getTargetOpts().FeatureMap);
           }
           return true;
-        } else if (II->getTokenID() != tok::identifier ||
-                   II->hasRevertedTokenIDToIdentifier()) {
-          // Treat all keywords that introduce a custom syntax of the form
-          //
-          //   '__some_keyword' '(' [...] ')'
-          //
-          // as being "builtin functions", even if the syntax isn't a valid
-          // function call (for example, because the builtin takes a type
-          // argument).
-          if (II->getName().starts_with("__builtin_") ||
-              II->getName().starts_with("__is_") ||
-              II->getName().starts_with("__has_"))
-            return true;
-          return llvm::StringSwitch<bool>(II->getName())
-              .Case("__array_rank", true)
-              .Case("__array_extent", true)
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) .Case("__" #Trait, true)
-#include "clang/Basic/TransformTypeTraits.def"
-              .Default(false);
+        } else if (IsBuiltinTrait(Tok)) {
+          return true;
+        } else if (II->getTokenID() != tok::identifier &&
+                   II->getName().starts_with("__builtin_")) {
+          return true;
         } else {
           return llvm::StringSwitch<bool>(II->getName())
               // Report builtin templates as being builtins.
diff --git a/clang/test/Preprocessor/feature_tests.cpp b/clang/test/Preprocessor/feature_tests.cpp
index 00421d74e6282..13e2a9a261b66 100644
--- a/clang/test/Preprocessor/feature_tests.cpp
+++ b/clang/test/Preprocessor/feature_tests.cpp
@@ -31,7 +31,11 @@
     !__has_builtin(__underlying_type) || \
     !__has_builtin(__is_trivial) || \
     !__has_builtin(__is_same_as) || \
-    !__has_builtin(__has_unique_object_representations)
+    !__has_builtin(__has_unique_object_representations) || \
+    !__has_builtin(__is_trivially_equality_comparable) || \
+    !__has_builtin(__reference_constructs_from_temporary) || \
+    !__has_builtin(__reference_binds_to_temporary) || \
+    !__has_builtin(__reference_converts_from_temporary)
 #error Clang should have these
 #endif
 

From 8cb7d8a70d81f836a5e4d1f1a8be4989ac8d2671 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Wed, 9 Oct 2024 16:20:03 +0100
Subject: [PATCH 357/427] [VectorCombine] Do not try to operate on
 OperandBundles. (#111635)

This bails out if we see an intrinsic with an operand bundle on it, to
make sure we don't process the bundles incorrectly.

Fixes #110382.

(cherry picked from commit c136d3237a3c6230cfe1ab3f0f6790f903c54a27)
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 59 ++++++++++---------
 .../AArch64/shuffletoidentity.ll              | 48 +++++++++++++++
 2 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 444598520c981..679934d07e36d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1900,33 +1900,35 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
 
     // We need each element to be the same type of value, and check that each
     // element has a single use.
-    if (all_of(drop_begin(Item), [Item](InstLane IL) {
-          Value *FrontV = Item.front().first->get();
-          if (!IL.first)
-            return true;
-          Value *V = IL.first->get();
-          if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUse())
-            return false;
-          if (V->getValueID() != FrontV->getValueID())
-            return false;
-          if (auto *CI = dyn_cast<CmpInst>(V))
-            if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
-              return false;
-          if (auto *CI = dyn_cast<CastInst>(V))
-            if (CI->getSrcTy() != cast<CastInst>(FrontV)->getSrcTy())
-              return false;
-          if (auto *SI = dyn_cast<SelectInst>(V))
-            if (!isa<VectorType>(SI->getOperand(0)->getType()) ||
-                SI->getOperand(0)->getType() !=
-                    cast<SelectInst>(FrontV)->getOperand(0)->getType())
-              return false;
-          if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
-            return false;
-          auto *II = dyn_cast<IntrinsicInst>(V);
-          return !II || (isa<IntrinsicInst>(FrontV) &&
-                         II->getIntrinsicID() ==
-                             cast<IntrinsicInst>(FrontV)->getIntrinsicID());
-        })) {
+    auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) {
+      Value *FrontV = Item.front().first->get();
+      if (!IL.first)
+        return true;
+      Value *V = IL.first->get();
+      if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUse())
+        return false;
+      if (V->getValueID() != FrontV->getValueID())
+        return false;
+      if (auto *CI = dyn_cast<CmpInst>(V))
+        if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
+          return false;
+      if (auto *CI = dyn_cast<CastInst>(V))
+        if (CI->getSrcTy() != cast<CastInst>(FrontV)->getSrcTy())
+          return false;
+      if (auto *SI = dyn_cast<SelectInst>(V))
+        if (!isa<VectorType>(SI->getOperand(0)->getType()) ||
+            SI->getOperand(0)->getType() !=
+                cast<SelectInst>(FrontV)->getOperand(0)->getType())
+          return false;
+      if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
+        return false;
+      auto *II = dyn_cast<IntrinsicInst>(V);
+      return !II || (isa<IntrinsicInst>(FrontV) &&
+                     II->getIntrinsicID() ==
+                         cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
+                     !II->hasOperandBundles());
+    };
+    if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) {
       // Check the operator is one that we support.
       if (isa<BinaryOperator, CmpInst>(FrontU)) {
         //  We exclude div/rem in case they hit UB from poison lanes.
@@ -1954,7 +1956,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
         Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
         continue;
       } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
-                 II && isTriviallyVectorizable(II->getIntrinsicID())) {
+                 II && isTriviallyVectorizable(II->getIntrinsicID()) &&
+                 !II->hasOperandBundles()) {
         for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
           if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
             if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index af04fb0ab4621..66fe11369d88b 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -1066,4 +1066,52 @@ entry:
   ret <2 x float> %4
 }
 
+define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) {
+; CHECK-LABEL: @operandbundles(
+; CHECK-NEXT:    [[CALL:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+; CHECK-NEXT:    [[SHUFFLEVECTOR:%.*]] = shufflevector <4 x i64> [[CALL]], <4 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHUFFLEVECTOR1:%.*]] = shufflevector <16 x i64> [[SHUFFLEVECTOR]], <16 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    ret <16 x i64> [[SHUFFLEVECTOR1]]
+;
+  %call = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+  %shufflevector = shufflevector <4 x i64> %call, <4 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %shufflevector1 = shufflevector <16 x i64> %shufflevector, <16 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i64> %shufflevector1
+}
+
+define <8 x i8> @operandbundles_first(<8 x i8> %a) {
+; CHECK-LABEL: @operandbundles_first(
+; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
+; CHECK-NEXT:    ret <8 x i8> [[R]]
+;
+  %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+  %abt = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %at, i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+  %abb = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %ab, i1 false)
+  %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %r
+}
+
+define <8 x i8> @operandbundles_second(<8 x i8> %a) {
+; CHECK-LABEL: @operandbundles_second(
+; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
+; CHECK-NEXT:    ret <8 x i8> [[R]]
+;
+  %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+  %abt = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %at, i1 false)
+  %abb = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %ab, i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
+  %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i8> %r
+}
+
+declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
 declare void @use(<4 x i8>)

From 04199538e3a293303514d93346ddd02bbf1c0314 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 10 Oct 2024 12:11:34 +0100
Subject: [PATCH 358/427] Update test since trunk

---
 .../Transforms/VectorCombine/AArch64/shuffletoidentity.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 66fe11369d88b..0b91618da6406 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -1084,8 +1084,8 @@ define <8 x i8> @operandbundles_first(<8 x i8> %a) {
 ; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
 ; CHECK-NEXT:    [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
+; CHECK-NEXT:    [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false)
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -1100,9 +1100,9 @@ define <8 x i8> @operandbundles_second(<8 x i8> %a) {
 ; CHECK-LABEL: @operandbundles_second(
 ; CHECK-NEXT:    [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 false)
 ; CHECK-NEXT:    [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP1]], i1 false)
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

From 16c2aae4f6163c1eb7321afd1cd3cdb781fe1727 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Wed, 9 Oct 2024 05:47:00 +0100
Subject: [PATCH 359/427] [compiler-rt] Remove SHA2 interceptions for
 NetBSD/FreeBSD. (#110246)

To Fix #110215

Interceptors introduced with 18a7ebda99044473fdbce6376993714ff54e6690

(cherry picked from commit d0b9c2c5647656738cda3fb670aa5d3b3a69d784)
---
 .../sanitizer_common_interceptors.inc         | 180 ---------------
 .../sanitizer_platform_interceptors.h         |   2 -
 .../TestCases/FreeBSD/md5.cpp                 | 119 ----------
 .../TestCases/FreeBSD/sha2.cpp                | 214 ------------------
 .../sanitizer_common/TestCases/NetBSD/md5.cpp | 114 ----------
 .../TestCases/NetBSD/sha2.cpp                 | 206 -----------------
 6 files changed, 835 deletions(-)
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 49c9dcbef358f..7a7af7936af31 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -8823,83 +8823,6 @@ INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, char *buf) {
 #define INIT_RMD160
 #endif
 
-#if SANITIZER_INTERCEPT_MD5
-INTERCEPTOR(void, MD5Init, void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
-  REAL(MD5Init)(context);
-  if (context)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data,
-            unsigned int len) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
-  if (data && len > 0)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Update)(context, data, len);
-  if (context)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Final)(digest, context);
-  if (digest)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
-}
-
-INTERCEPTOR(char *, MD5End, void *context, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  char *ret = REAL(MD5End)(context, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
-  if (filename)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
-  char *ret = REAL(MD5File)(filename, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len,
-            char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
-  if (data && len > 0)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  char *ret = REAL(MD5Data)(data, len, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-#define INIT_MD5                                                               \
-  COMMON_INTERCEPT_FUNCTION(MD5Init);                                          \
-  COMMON_INTERCEPT_FUNCTION(MD5Update);                                        \
-  COMMON_INTERCEPT_FUNCTION(MD5Final);                                         \
-  COMMON_INTERCEPT_FUNCTION(MD5End);                                           \
-  COMMON_INTERCEPT_FUNCTION(MD5File);                                          \
-  COMMON_INTERCEPT_FUNCTION(MD5Data)
-#else
-#define INIT_MD5
-#endif
-
 #if SANITIZER_INTERCEPT_FSEEK
 INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) {
   void *ctx;
@@ -9030,107 +8953,6 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
 #define INIT_MD2
 #endif
 
-#if SANITIZER_INTERCEPT_SHA2
-#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
-  INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
-    REAL(SHA##LEN##_Init)(context); \
-    if (context) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-  } \
-  INTERCEPTOR(void, SHA##LEN##_Update, void *context, \
-              const u8 *data, SIZE_T len) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \
-    if (data && len > 0) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    REAL(SHA##LEN##_Update)(context, data, len); \
-    if (context) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-  } \
-  INTERCEPTOR(void, SHA##LEN##_Final, u8 digest[LEN/8], \
-  void *context) { \
-    void *ctx; \
-    CHECK_EQ(SHA##LEN##_digest_length, LEN/8); \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Final, digest, context); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    REAL(SHA##LEN##_Final)(digest, context); \
-    if (digest) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, \
-                                     sizeof(digest[0]) * \
-  SHA##LEN##_digest_length); \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_End, void *context, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_End, context, buf); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    char *ret = REAL(SHA##LEN##_End)(context, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_File, const char *filename, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \
-    if (filename) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
-    char *ret = REAL(SHA##LEN##_File)(filename, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_FileChunk, const char *filename, char *buf, \
-              OFF_T offset, OFF_T length) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \
-  length); \
-    if (filename) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
-    char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_Data, u8 *data, SIZE_T len, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Data, data, len, buf); \
-    if (data && len > 0) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
-    char *ret = REAL(SHA##LEN##_Data)(data, len, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  }
-
-SHA2_INTERCEPTORS(224, u32)
-SHA2_INTERCEPTORS(256, u32)
-SHA2_INTERCEPTORS(384, u64)
-SHA2_INTERCEPTORS(512, u64)
-
-#define INIT_SHA2_INTECEPTORS(LEN) \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Update); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Final); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_End); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_File); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_FileChunk); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Data)
-
-#define INIT_SHA2 \
-  INIT_SHA2_INTECEPTORS(224); \
-  INIT_SHA2_INTECEPTORS(256); \
-  INIT_SHA2_INTECEPTORS(384); \
-  INIT_SHA2_INTECEPTORS(512)
-#undef SHA2_INTERCEPTORS
-#else
-#define INIT_SHA2
-#endif
-
 #if SANITIZER_INTERCEPT_VIS
 INTERCEPTOR(char *, vis, char *dst, int c, int flag, int nextc) {
   void *ctx;
@@ -10588,10 +10410,8 @@ static void InitializeCommonInterceptors() {
   INIT_SHA1;
   INIT_MD4;
   INIT_RMD160;
-  INIT_MD5;
   INIT_FSEEK;
   INIT_MD2;
-  INIT_SHA2;
   INIT_VIS;
   INIT_CDB;
   INIT_GETFSENT;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 7d7ed9bc07ccf..05cd2d71cbbae 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -559,10 +559,8 @@
 #define SANITIZER_INTERCEPT_SHA1 SI_NETBSD
 #define SANITIZER_INTERCEPT_MD4 SI_NETBSD
 #define SANITIZER_INTERCEPT_RMD160 SI_NETBSD
-#define SANITIZER_INTERCEPT_MD5 (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_FSEEK (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_MD2 SI_NETBSD
-#define SANITIZER_INTERCEPT_SHA2 (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_CDB SI_NETBSD
 #define SANITIZER_INTERCEPT_VIS (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_POPEN SI_POSIX
diff --git a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp b/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp
deleted file mode 100644
index 13325880a023a..0000000000000
--- a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// RUN: %clangxx -O0 -g %s -o %t -lmd && %run %t 2>&1 | FileCheck %s
-
-#include <sys/param.h>
-
-#include <assert.h>
-#include <md5.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-void test1() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  uint8_t digest[MD5_DIGEST_LENGTH];
-  size_t entropysz = sizeof(entropy);
-  size_t digestsz = sizeof(digest);
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, entropysz);
-  MD5Final(digest, &ctx);
-
-  printf("test1: '");
-  for (size_t i = 0; i < digestsz; i++)
-    printf("%02x", digest[i]);
-  printf("'\n");
-}
-
-void test2() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[MD5_DIGEST_STRING_LENGTH];
-  size_t entropysz = sizeof(entropy);
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, entropysz);
-  char *p = MD5End(&ctx, digest);
-  assert(p);
-
-  printf("test2: '%s'\n", digest);
-}
-
-void test3() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  size_t entropysz = sizeof(entropy);
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, entropysz);
-  char *p = MD5End(&ctx, NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test3: '%s'\n", p);
-
-  free(p);
-}
-
-void test4() {
-  char digest[MD5_DIGEST_STRING_LENGTH];
-
-  char *p = MD5File("/etc/fstab", digest);
-  assert(p == digest);
-
-  printf("test4: '%s'\n", p);
-}
-
-void test5() {
-  char *p = MD5File("/etc/fstab", NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test5: '%s'\n", p);
-
-  free(p);
-}
-
-void test6() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[MD5_DIGEST_STRING_LENGTH];
-  size_t entropysz = sizeof(entropy);
-
-  char *p = MD5Data(entropy, entropysz, digest);
-  assert(p == digest);
-
-  printf("test6: '%s'\n", p);
-}
-
-void test7() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  size_t entropysz = sizeof(entropy);
-
-  char *p = MD5Data(entropy, entropysz, NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test7: '%s'\n", p);
-
-  free(p);
-}
-
-int main(void) {
-  printf("MD5\n");
-
-  test1();
-  test2();
-  test3();
-  test4();
-  test5();
-  test6();
-  test7();
-
-  // CHECK: MD5
-  // CHECK: test1: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test2: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test3: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test4: '{{.*}}'
-  // CHECK: test5: '{{.*}}'
-  // CHECK: test6: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test7: '86e65b1ef4a830af347ac05ab4f0e999'
-
-  return 0;
-}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp b/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp
deleted file mode 100644
index 3012aca7d7207..0000000000000
--- a/compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-// RUN: %clangxx -O0 -g %s -DSHASIZE=224 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-224
-// RUN: %clangxx -O0 -g %s -DSHASIZE=256 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-256
-// RUN: %clangxx -O0 -g %s -DSHASIZE=384 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-384
-// RUN: %clangxx -O0 -g %s -DSHASIZE=512 -o %t -lmd && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-512
-
-#include <sys/param.h>
-
-#include <assert.h>
-#include <sha224.h>
-#include <sha256.h>
-#include <sha384.h>
-#include <sha512.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef SHASIZE
-#error SHASIZE must be defined
-#endif
-
-#define _SHA_CTX(x) SHA##x##_CTX
-#define SHA_CTX(x) _SHA_CTX(x)
-
-#define _SHA_DIGEST_LENGTH(x) SHA##x##_DIGEST_LENGTH
-#define SHA_DIGEST_LENGTH(x) _SHA_DIGEST_LENGTH(x)
-
-#define _SHA_DIGEST_STRING_LENGTH(x) SHA##x##_DIGEST_STRING_LENGTH
-#define SHA_DIGEST_STRING_LENGTH(x) _SHA_DIGEST_STRING_LENGTH(x)
-
-#define _SHA_Init(x) SHA##x##_Init
-#define SHA_Init(x) _SHA_Init(x)
-
-#define _SHA_Update(x) SHA##x##_Update
-#define SHA_Update(x) _SHA_Update(x)
-
-#define _SHA_Final(x) SHA##x##_Final
-#define SHA_Final(x) _SHA_Final(x)
-
-#define _SHA_End(x) SHA##x##_End
-#define SHA_End(x) _SHA_End(x)
-
-#define _SHA_File(x) SHA##x##_File
-#define SHA_File(x) _SHA_File(x)
-
-#define _SHA_FileChunk(x) SHA##x##_FileChunk
-#define SHA_FileChunk(x) _SHA_FileChunk(x)
-
-#define _SHA_Data(x) SHA##x##_Data
-#define SHA_Data(x) _SHA_Data(x)
-
-void test1() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  uint8_t digest[SHA_DIGEST_LENGTH(SHASIZE)];
-  size_t entropysz = sizeof(entropy);
-  size_t digestsz = sizeof(digest);
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, entropysz);
-  SHA_Final(SHASIZE)(digest, &ctx);
-
-  printf("test1: '");
-  for (size_t i = 0; i < digestsz; i++)
-    printf("%02x", digest[i]);
-  printf("'\n");
-}
-
-void test2() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-  size_t entropysz = sizeof(entropy);
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, entropysz);
-  char *p = SHA_End(SHASIZE)(&ctx, digest);
-  assert(p == digest);
-
-  printf("test2: '%s'\n", digest);
-}
-
-void test3() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  size_t entropysz = sizeof(entropy);
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, entropysz);
-  char *p = SHA_End(SHASIZE)(&ctx, NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test3: '%s'\n", p);
-
-  free(p);
-}
-
-void test4() {
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  char *p = SHA_File(SHASIZE)("/etc/fstab", digest);
-  assert(p == digest);
-
-  printf("test4: '%s'\n", p);
-}
-
-void test5() {
-  char *p = SHA_File(SHASIZE)("/etc/fstab", NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test5: '%s'\n", p);
-
-  free(p);
-}
-
-void test6() {
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", digest, 10, 20);
-  assert(p == digest);
-
-  printf("test6: '%s'\n", p);
-}
-
-void test7() {
-  char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", NULL, 10, 20);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test7: '%s'\n", p);
-
-  free(p);
-}
-
-void test8() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-  size_t entropysz = sizeof(entropy);
-
-  char *p = SHA_Data(SHASIZE)(entropy, entropysz, digest);
-  assert(p == digest);
-
-  printf("test8: '%s'\n", p);
-}
-
-void test9() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  size_t entropysz = sizeof(entropy);
-
-  char *p = SHA_Data(SHASIZE)(entropy, entropysz, NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test9: '%s'\n", p);
-
-  free(p);
-}
-
-int main(void) {
-  printf("SHA%d\n", SHASIZE);
-
-  test1();
-  test2();
-  test3();
-  test4();
-  test5();
-  test6();
-  test7();
-  test8();
-  test9();
-
-  // CHECK-224: SHA224
-  // CHECK-224: test1: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test2: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test3: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test4: '{{.*}}'
-  // CHECK-224: test5: '{{.*}}'
-  // CHECK-224: test6: '{{.*}}'
-  // CHECK-224: test7: '{{.*}}'
-  // CHECK-224: test8: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test9: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-
-  // CHECK-256: SHA256
-  // CHECK-256: test1: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test2: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test3: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test4: '{{.*}}'
-  // CHECK-256: test5: '{{.*}}'
-  // CHECK-256: test6: '{{.*}}'
-  // CHECK-256: test7: '{{.*}}'
-  // CHECK-256: test8: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test9: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-
-  // CHECK-384: SHA384
-  // CHECK-384: test1: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test2: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test3: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test4: '{{.*}}'
-  // CHECK-384: test5: '{{.*}}'
-  // CHECK-384: test6: '{{.*}}'
-  // CHECK-384: test7: '{{.*}}'
-  // CHECK-384: test8: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test9: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-
-  // CHECK-512: SHA512
-  // CHECK-512: test1: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test2: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test3: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test4: '{{.*}}'
-  // CHECK-512: test5: '{{.*}}'
-  // CHECK-512: test6: '{{.*}}'
-  // CHECK-512: test7: '{{.*}}'
-  // CHECK-512: test8: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test9: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-
-  return 0;
-}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp b/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp
deleted file mode 100644
index aee21681800d8..0000000000000
--- a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-// RUN: %clangxx -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
-
-#include <sys/param.h>
-
-#include <assert.h>
-#include <endian.h>
-#include <md5.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-void test1() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  uint8_t digest[MD5_DIGEST_LENGTH];
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, __arraycount(entropy));
-  MD5Final(digest, &ctx);
-
-  printf("test1: '");
-  for (size_t i = 0; i < __arraycount(digest); i++)
-    printf("%02x", digest[i]);
-  printf("'\n");
-}
-
-void test2() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[MD5_DIGEST_STRING_LENGTH];
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, __arraycount(entropy));
-  char *p = MD5End(&ctx, digest);
-  assert(p);
-
-  printf("test2: '%s'\n", digest);
-}
-
-void test3() {
-  MD5_CTX ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-
-  MD5Init(&ctx);
-  MD5Update(&ctx, entropy, __arraycount(entropy));
-  char *p = MD5End(&ctx, NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test3: '%s'\n", p);
-
-  free(p);
-}
-
-void test4() {
-  char digest[MD5_DIGEST_STRING_LENGTH];
-
-  char *p = MD5File("/etc/fstab", digest);
-  assert(p == digest);
-
-  printf("test4: '%s'\n", p);
-}
-
-void test5() {
-  char *p = MD5File("/etc/fstab", NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test5: '%s'\n", p);
-
-  free(p);
-}
-
-void test6() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[MD5_DIGEST_STRING_LENGTH];
-
-  char *p = MD5Data(entropy, __arraycount(entropy), digest);
-  assert(p == digest);
-
-  printf("test6: '%s'\n", p);
-}
-
-void test7() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-
-  char *p = MD5Data(entropy, __arraycount(entropy), NULL);
-  assert(strlen(p) == MD5_DIGEST_STRING_LENGTH - 1);
-
-  printf("test7: '%s'\n", p);
-
-  free(p);
-}
-
-int main(void) {
-  printf("MD5\n");
-
-  test1();
-  test2();
-  test3();
-  test4();
-  test5();
-  test6();
-  test7();
-
-  // CHECK: MD5
-  // CHECK: test1: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test2: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test3: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test4: '{{.*}}'
-  // CHECK: test5: '{{.*}}'
-  // CHECK: test6: '86e65b1ef4a830af347ac05ab4f0e999'
-  // CHECK: test7: '86e65b1ef4a830af347ac05ab4f0e999'
-
-  return 0;
-}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp b/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp
deleted file mode 100644
index e905e3b610fd3..0000000000000
--- a/compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-// RUN: %clangxx -O0 -g %s -DSHASIZE=224 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-224
-// RUN: %clangxx -O0 -g %s -DSHASIZE=256 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-256
-// RUN: %clangxx -O0 -g %s -DSHASIZE=384 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-384
-// RUN: %clangxx -O0 -g %s -DSHASIZE=512 -o %t && %run %t 2>&1 | FileCheck %s -check-prefix=CHECK-512
-
-#include <sys/param.h>
-
-#include <assert.h>
-#include <endian.h>
-#include <sha2.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef SHASIZE
-#error SHASIZE must be defined
-#endif
-
-#define _SHA_CTX(x) SHA##x##_CTX
-#define SHA_CTX(x) _SHA_CTX(x)
-
-#define _SHA_DIGEST_LENGTH(x) SHA##x##_DIGEST_LENGTH
-#define SHA_DIGEST_LENGTH(x) _SHA_DIGEST_LENGTH(x)
-
-#define _SHA_DIGEST_STRING_LENGTH(x) SHA##x##_DIGEST_STRING_LENGTH
-#define SHA_DIGEST_STRING_LENGTH(x) _SHA_DIGEST_STRING_LENGTH(x)
-
-#define _SHA_Init(x) SHA##x##_Init
-#define SHA_Init(x) _SHA_Init(x)
-
-#define _SHA_Update(x) SHA##x##_Update
-#define SHA_Update(x) _SHA_Update(x)
-
-#define _SHA_Final(x) SHA##x##_Final
-#define SHA_Final(x) _SHA_Final(x)
-
-#define _SHA_End(x) SHA##x##_End
-#define SHA_End(x) _SHA_End(x)
-
-#define _SHA_File(x) SHA##x##_File
-#define SHA_File(x) _SHA_File(x)
-
-#define _SHA_FileChunk(x) SHA##x##_FileChunk
-#define SHA_FileChunk(x) _SHA_FileChunk(x)
-
-#define _SHA_Data(x) SHA##x##_Data
-#define SHA_Data(x) _SHA_Data(x)
-
-void test1() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  uint8_t digest[SHA_DIGEST_LENGTH(SHASIZE)];
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy));
-  SHA_Final(SHASIZE)(digest, &ctx);
-
-  printf("test1: '");
-  for (size_t i = 0; i < __arraycount(digest); i++)
-    printf("%02x", digest[i]);
-  printf("'\n");
-}
-
-void test2() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy));
-  char *p = SHA_End(SHASIZE)(&ctx, digest);
-  assert(p == digest);
-
-  printf("test2: '%s'\n", digest);
-}
-
-void test3() {
-  SHA_CTX(SHASIZE) ctx;
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-
-  SHA_Init(SHASIZE)(&ctx);
-  SHA_Update(SHASIZE)(&ctx, entropy, __arraycount(entropy));
-  char *p = SHA_End(SHASIZE)(&ctx, NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test3: '%s'\n", p);
-
-  free(p);
-}
-
-void test4() {
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  char *p = SHA_File(SHASIZE)("/etc/fstab", digest);
-  assert(p == digest);
-
-  printf("test4: '%s'\n", p);
-}
-
-void test5() {
-  char *p = SHA_File(SHASIZE)("/etc/fstab", NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test5: '%s'\n", p);
-
-  free(p);
-}
-
-void test6() {
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", digest, 10, 20);
-  assert(p == digest);
-
-  printf("test6: '%s'\n", p);
-}
-
-void test7() {
-  char *p = SHA_FileChunk(SHASIZE)("/etc/fstab", NULL, 10, 20);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test7: '%s'\n", p);
-
-  free(p);
-}
-
-void test8() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-  char digest[SHA_DIGEST_STRING_LENGTH(SHASIZE)];
-
-  char *p = SHA_Data(SHASIZE)(entropy, __arraycount(entropy), digest);
-  assert(p == digest);
-
-  printf("test8: '%s'\n", p);
-}
-
-void test9() {
-  uint8_t entropy[] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
-
-  char *p = SHA_Data(SHASIZE)(entropy, __arraycount(entropy), NULL);
-  assert(strlen(p) == SHA_DIGEST_STRING_LENGTH(SHASIZE) - 1);
-
-  printf("test9: '%s'\n", p);
-
-  free(p);
-}
-
-int main(void) {
-  printf("SHA" ___STRING(SHASIZE) "\n");
-
-  test1();
-  test2();
-  test3();
-  test4();
-  test5();
-  test6();
-  test7();
-  test8();
-  test9();
-
-  // CHECK-224: SHA224
-  // CHECK-224: test1: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test2: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test3: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test4: '{{.*}}'
-  // CHECK-224: test5: '{{.*}}'
-  // CHECK-224: test6: '{{.*}}'
-  // CHECK-224: test7: '{{.*}}'
-  // CHECK-224: test8: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-  // CHECK-224: test9: '760dfb93100a6bf5996c90f678e529dc945bb2f74a211eedcf0f3a48'
-
-  // CHECK-256: SHA256
-  // CHECK-256: test1: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test2: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test3: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test4: '{{.*}}'
-  // CHECK-256: test5: '{{.*}}'
-  // CHECK-256: test6: '{{.*}}'
-  // CHECK-256: test7: '{{.*}}'
-  // CHECK-256: test8: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-  // CHECK-256: test9: 'bb000ddd92a0a2a346f0b531f278af06e370f86932ccafccc892d68d350f80f8'
-
-  // CHECK-384: SHA384
-  // CHECK-384: test1: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test2: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test3: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test4: '{{.*}}'
-  // CHECK-384: test5: '{{.*}}'
-  // CHECK-384: test6: '{{.*}}'
-  // CHECK-384: test7: '{{.*}}'
-  // CHECK-384: test8: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-  // CHECK-384: test9: 'f450c023b168ebd56ff916ca9b1f1f0010b8c592d28205cc91fa3056f629eed108e8bac864f01ca37a3edee596739e12'
-
-  // CHECK-512: SHA512
-  // CHECK-512: test1: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test2: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test3: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test4: '{{.*}}'
-  // CHECK-512: test5: '{{.*}}'
-  // CHECK-512: test6: '{{.*}}'
-  // CHECK-512: test7: '{{.*}}'
-  // CHECK-512: test8: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-  // CHECK-512: test9: '0e3f68731c0e2a6a4eab5d713c9a80dc78086b5fa7d2b5ab127277958e68d1b1dee1882b083b0106cd4319de42c0c8f452871364f5baa8a6379690612c6b844e'
-
-  return 0;
-}

From 96839b6f16a063c3f15cac5d9b6a0e5912f6341e Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Fri, 4 Oct 2024 18:18:03 -0700
Subject: [PATCH 360/427] [clang-format] Handle template closer followed by
 braces (#110971)

Fixes #110968.

(cherry picked from commit e5b05a51b8151cc7788bbdea4d491e5ccfceedea)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 5 +++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index f7b3561f6e033..631c7c62baac1 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2131,6 +2131,11 @@ void UnwrappedLineParser::parseStructuralElement(
         return;
       }
       break;
+    case tok::greater:
+      nextToken();
+      if (FormatTok->is(tok::l_brace))
+        FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
+      break;
     default:
       nextToken();
       break;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 2f09b380a8713..6f3ef3b646c61 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3399,6 +3399,11 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
   ASSERT_EQ(Tokens.size(), 11u) << Tokens;
   EXPECT_TOKEN(Tokens[2], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
+
+  Tokens = annotate("return std::conditional_t<T::value == U::value, T, U>{};");
+  ASSERT_EQ(Tokens.size(), 21u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser);
 }
 
 } // namespace

From 35bd5ff4ca7095baeb1f157491e81e3277a0ae34 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Fri, 11 Oct 2024 18:19:21 +0800
Subject: [PATCH 361/427] [InstCombine] Drop range attributes in
 `foldIsPowerOf2` (#111946)

Fixes https://github.com/llvm/llvm-project/issues/111934.
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 18 ++++++++---
 llvm/test/Transforms/InstCombine/ispow2.ll    | 32 +++++++++++++++++++
 2 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f9caa4da44931..3222e8298c3f0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -926,9 +926,11 @@ static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
 }
 
 /// Reduce a pair of compares that check if a value has exactly 1 bit set.
-/// Also used for logical and/or, must be poison safe.
+/// Also used for logical and/or, must be poison safe if range attributes are
+/// dropped.
 static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
-                             InstCombiner::BuilderTy &Builder) {
+                             InstCombiner::BuilderTy &Builder,
+                             InstCombinerImpl &IC) {
   // Handle 'and' / 'or' commutation: make the equality check the first operand.
   if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE)
     std::swap(Cmp0, Cmp1);
@@ -942,7 +944,10 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
       match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
                          m_SpecificInt(2))) &&
       Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) {
-    Value *CtPop = Cmp1->getOperand(0);
+    auto *CtPop = cast<Instruction>(Cmp1->getOperand(0));
+    // Drop range attributes and re-infer them in the next iteration.
+    CtPop->dropPoisonGeneratingAnnotations();
+    IC.addToWorklist(CtPop);
     return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1));
   }
   // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
@@ -950,7 +955,10 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
       match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
                          m_SpecificInt(1))) &&
       Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) {
-    Value *CtPop = Cmp1->getOperand(0);
+    auto *CtPop = cast<Instruction>(Cmp1->getOperand(0));
+    // Drop range attributes and re-infer them in the next iteration.
+    CtPop->dropPoisonGeneratingAnnotations();
+    IC.addToWorklist(CtPop);
     return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1));
   }
   return nullptr;
@@ -3347,7 +3355,7 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder))
       return V;
 
-  if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder))
+  if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder, *this))
     return V;
 
   if (Value *V = foldPowerOf2AndShiftedMask(LHS, RHS, IsAnd, Builder))
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index a143b1347ccee..216ccc5c77257 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -1522,3 +1522,35 @@ define <2 x i1> @not_pow2_or_z_known_bits_fail_wrong_cmp(<2 x i32> %xin) {
   %r = icmp ugt <2 x i32> %cnt, <i32 2, i32 2>
   ret <2 x i1> %r
 }
+
+; Make sure that range attributes on return values are dropped after merging these two icmps
+
+define i1 @has_single_bit(i32 %x) {
+; CHECK-LABEL: @has_single_bit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[POPCNT:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; CHECK-NEXT:    [[SEL:%.*]] = icmp eq i32 [[POPCNT]], 1
+; CHECK-NEXT:    ret i1 [[SEL]]
+;
+entry:
+  %cmp1 = icmp ne i32 %x, 0
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp2 = icmp ult i32 %popcnt, 2
+  %sel = select i1 %cmp1, i1 %cmp2, i1 false
+  ret i1 %sel
+}
+
+define i1 @has_single_bit_inv(i32 %x) {
+; CHECK-LABEL: @has_single_bit_inv(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[POPCNT:%.*]] = call range(i32 0, 33) i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; CHECK-NEXT:    [[SEL:%.*]] = icmp ne i32 [[POPCNT]], 1
+; CHECK-NEXT:    ret i1 [[SEL]]
+;
+entry:
+  %cmp1 = icmp eq i32 %x, 0
+  %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
+  %cmp2 = icmp ugt i32 %popcnt, 1
+  %sel = select i1 %cmp1, i1 true, i1 %cmp2
+  ret i1 %sel
+}

From f8cf339563e2f38487de15ce11eddc4ea6c883af Mon Sep 17 00:00:00 2001
From: Brian Cain <bcain@quicinc.com>
Date: Fri, 11 Oct 2024 14:31:41 -0500
Subject: [PATCH 362/427] [lld][Hexagon] Support predicated-add GOT_16_X mask
 lookup (#111896)

When encountering an instruction like `if (p0) r0 = add(r0,##bar@GOT)`,
lld would fail with:
```
ld.lld: error: unrecognized instruction for 16_X type: 0x7400C000
```

This issue was encountered while building libreadline with clang 19.1.0.

Fixes: #111876
(cherry picked from commit 77aa8257acbd773c0c430cd962da1bcfbd5ee94b)
---
 lld/ELF/Arch/Hexagon.cpp      | 20 +++++++++++++++++---
 lld/test/ELF/hexagon-shared.s | 19 +++++++++++++++----
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index abde3cd964917..56cf96fd17704 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -181,11 +181,13 @@ static const InstructionMask r6[] = {
     {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
     {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
 
+constexpr uint32_t instParsePacketEnd = 0x0000c000;
+
 static bool isDuplex(uint32_t insn) {
   // Duplex forms have a fixed mask and parse bits 15:14 are always
   // zero.  Non-duplex insns will always have at least one bit set in the
   // parse field.
-  return (0xC000 & insn) == 0;
+  return (instParsePacketEnd & insn) == 0;
 }
 
 static uint32_t findMaskR6(uint32_t insn) {
@@ -216,6 +218,12 @@ static uint32_t findMaskR11(uint32_t insn) {
 }
 
 static uint32_t findMaskR16(uint32_t insn) {
+  if (isDuplex(insn))
+    return 0x03f00000;
+
+  // Clear the end-packet-parse bits:
+  insn = insn & ~instParsePacketEnd;
+
   if ((0xff000000 & insn) == 0x48000000)
     return 0x061f20ff;
   if ((0xff000000 & insn) == 0x49000000)
@@ -225,8 +233,14 @@ static uint32_t findMaskR16(uint32_t insn) {
   if ((0xff000000 & insn) == 0xb0000000)
     return 0x0fe03fe0;
 
-  if (isDuplex(insn))
-    return 0x03f00000;
+  if ((0xff802000 & insn) == 0x74000000)
+    return 0x00001fe0;
+  if ((0xff802000 & insn) == 0x74002000)
+    return 0x00001fe0;
+  if ((0xff802000 & insn) == 0x74800000)
+    return 0x00001fe0;
+  if ((0xff802000 & insn) == 0x74802000)
+    return 0x00001fe0;
 
   for (InstructionMask i : r6)
     if ((0xff000000 & insn) == i.cmpMask)
diff --git a/lld/test/ELF/hexagon-shared.s b/lld/test/ELF/hexagon-shared.s
index 747822039e839..01f7286584705 100644
--- a/lld/test/ELF/hexagon-shared.s
+++ b/lld/test/ELF/hexagon-shared.s
@@ -42,6 +42,13 @@ r0 = add(r1,##bar@GOT)
 { r0 = add(r0,##bar@GOT)
   memw(r0) = r2 }
 
+# R_HEX_GOT_16_X, pred add
+if (p0) r0 = add(r0,##bar@GOT)
+if (!p0) r0 = add(r0,##bar@GOT)
+{ p0 = cmp.gtu(r0, r1)
+  if (p0.new) r0 = add(r0,##bar@GOT) }
+{ p0 = cmp.gtu(r0, r1)
+  if (!p0.new) r0 = add(r0,##bar@GOT) }
 
 # foo is local so no plt will be generated
 foo:
@@ -78,12 +85,16 @@ pvar:
 # PLT-NEXT: r28 = memw(r14+#0) }
 # PLT-NEXT: jumpr r28 }
 
-# TEXT:  8c 00 01 00 0001008c
-# TEXT: { 	call 0x102d0 }
-# TEXT: if (p0) jump:nt 0x102d0
-# TEXT: r0 = #0 ; jump 0x102d0
+# TEXT:  bc 00 01 00 000100bc
+# TEXT: { 	call 0x10300 }
+# TEXT: if (p0) jump:nt 0x10300
+# TEXT: r0 = #0 ; jump 0x10300
 # TEXT: r0 = add(r1,##-65548)
 # TEXT: r0 = add(r0,##-65548); memw(r0+#0) = r2 }
+# TEXT: if (p0) r0 = add(r0,##-65548)
+# TEXT: if (!p0) r0 = add(r0,##-65548)
+# TEXT: if (p0.new) r0 = add(r0,##-65548)
+# TEXT: if (!p0.new) r0 = add(r0,##-65548)
 
 # GOT: .got:
 # GOT:  00 00 00 00 00000000 <unknown>

From 689282743d4cada4561eab19fda8c8d805e1ab7e Mon Sep 17 00:00:00 2001
From: Max Winkler <max.enrico.winkler@gmail.com>
Date: Mon, 14 Oct 2024 12:22:25 -0700
Subject: [PATCH 363/427] [Headers] [ARM64EC] Fix extra tokens inside intrin0.h
 preprocessor directive (#112066)

Fixes https://github.com/llvm/llvm-project/pull/87717.

(cherry picked from commit 9bf68c2400e8966511332dfbf5c0f05e8a3300fa)
---
 clang/lib/Headers/intrin0.h      |  2 +-
 clang/test/Headers/ms-intrin.cpp | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Headers/intrin0.h b/clang/lib/Headers/intrin0.h
index 866c8896617d2..6b01f3808652a 100644
--- a/clang/lib/Headers/intrin0.h
+++ b/clang/lib/Headers/intrin0.h
@@ -44,7 +44,7 @@ unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
                                                  __int64 *_ComparandResult);
 #endif
 
-#ifdef __x86_64__ && !defined(__arm64ec__)
+#if defined(__x86_64__) && !defined(__arm64ec__)
 unsigned __int64 _umul128(unsigned __int64, unsigned __int64,
                           unsigned __int64 *);
 unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
diff --git a/clang/test/Headers/ms-intrin.cpp b/clang/test/Headers/ms-intrin.cpp
index cb7cd47956205..d630883e79d6a 100644
--- a/clang/test/Headers/ms-intrin.cpp
+++ b/clang/test/Headers/ms-intrin.cpp
@@ -1,31 +1,31 @@
 // RUN: %clang_cc1 -triple i386-pc-win32 -target-cpu pentium4 \
 // RUN:     -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -fsyntax-only -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple i386-pc-win32 -target-cpu broadwell \
 // RUN:     -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror \
+// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple x86_64-pc-win32  \
 // RUN:     -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror \
+// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple thumbv7--windows \
 // RUN:     -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -fsyntax-only -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple aarch64--windows \
 // RUN:     -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -fsyntax-only -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple arm64ec--windows \
 // RUN:     -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -fsyntax-only -Werror -Wsystem-headers \
 // RUN:     -isystem %S/Inputs/include %s
 
 // REQUIRES: x86-registered-target

From 7ba7d8e2f7b6445b60679da826210cdde29eaf8b Mon Sep 17 00:00:00 2001
From: Vassil Vassilev <v.g.vassilev@gmail.com>
Date: Wed, 21 Aug 2024 07:22:31 +0200
Subject: [PATCH 364/427] [clang-repl] [codegen] Reduce the state in TBAA. NFC
 for static compilation. (#98138)

In incremental compilation clang works with multiple `llvm::Module`s.
Our current approach is to create a CodeGenModule entity for every new
module request (via StartModule). However, some of the state such as the
mangle context needs to be preserved to keep the original semantics in
the ever-growing TU.

Fixes: llvm/llvm-project#95581.

cc: @jeaye
(cherry picked from commit 6c62ad446b2441b78ae524d9e700e351d5a76394)
---
 clang/lib/CodeGen/CGCall.cpp                  | 31 +++++++++----------
 clang/lib/CodeGen/CGClass.cpp                 |  2 +-
 clang/lib/CodeGen/CodeGenModule.cpp           | 15 +++++----
 clang/lib/CodeGen/CodeGenModule.h             |  5 +--
 clang/lib/CodeGen/CodeGenTBAA.cpp             | 13 +++++---
 clang/lib/CodeGen/CodeGenTBAA.h               |  5 +--
 clang/lib/CodeGen/CodeGenTypes.cpp            |  7 +++--
 clang/lib/CodeGen/CodeGenTypes.h              |  8 +----
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         |  2 +-
 .../assigment-with-implicit-ctor.cpp          | 13 ++++++++
 10 files changed, 56 insertions(+), 45 deletions(-)
 create mode 100644 clang/test/Interpreter/assigment-with-implicit-ctor.cpp

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index d7ebffa8c5e4e..0416fa03d749a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -314,7 +314,8 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
 
   if (MD->isImplicitObjectMemberFunction()) {
     // The abstract case is perfectly fine.
-    const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
+    const CXXRecordDecl *ThisType =
+        getCXXABI().getThisArgumentTypeForMethod(MD);
     return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD);
   }
 
@@ -337,7 +338,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
   SmallVector<CanQualType, 16> argTypes;
   SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
 
-  const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(GD);
+  const CXXRecordDecl *ThisType = getCXXABI().getThisArgumentTypeForMethod(GD);
   argTypes.push_back(DeriveThisType(ThisType, MD));
 
   bool PassParams = true;
@@ -356,7 +357,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
     appendParameterTypes(*this, argTypes, paramInfos, FTP);
 
   CGCXXABI::AddedStructorArgCounts AddedArgs =
-      TheCXXABI.buildStructorSignature(GD, argTypes);
+      getCXXABI().buildStructorSignature(GD, argTypes);
   if (!paramInfos.empty()) {
     // Note: prefix implies after the first param.
     if (AddedArgs.Prefix)
@@ -372,11 +373,10 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
                                       : RequiredArgs::All);
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
-  CanQualType resultType = TheCXXABI.HasThisReturn(GD)
-                               ? argTypes.front()
-                               : TheCXXABI.hasMostDerivedReturn(GD)
-                                     ? CGM.getContext().VoidPtrTy
-                                     : Context.VoidTy;
+  CanQualType resultType = getCXXABI().HasThisReturn(GD) ? argTypes.front()
+                           : getCXXABI().hasMostDerivedReturn(GD)
+                               ? CGM.getContext().VoidPtrTy
+                               : Context.VoidTy;
   return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod,
                                  argTypes, extInfo, paramInfos, required);
 }
@@ -437,11 +437,10 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
                               : RequiredArgs::All;
 
   GlobalDecl GD(D, CtorKind);
-  CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
-                               ? ArgTypes.front()
-                               : TheCXXABI.hasMostDerivedReturn(GD)
-                                     ? CGM.getContext().VoidPtrTy
-                                     : Context.VoidTy;
+  CanQualType ResultType = getCXXABI().HasThisReturn(GD) ? ArgTypes.front()
+                           : getCXXABI().hasMostDerivedReturn(GD)
+                               ? CGM.getContext().VoidPtrTy
+                               : Context.VoidTy;
 
   FunctionType::ExtInfo Info = FPT->getExtInfo();
   llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos;
@@ -806,7 +805,7 @@ const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo(
   } else if (info.getCC() == CC_Swift || info.getCC() == CC_SwiftAsync) {
     swiftcall::computeABIInfo(CGM, *FI);
   } else {
-    getABIInfo().computeInfo(*FI);
+    CGM.getABIInfo().computeInfo(*FI);
   }
 
   // Loop over all of the computed argument and return value info.  If any of
@@ -6022,6 +6021,6 @@ RValue CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr,
                                     : EmitVAListRef(VE->getSubExpr());
   QualType Ty = VE->getType();
   if (VE->isMicrosoftABI())
-    return CGM.getTypes().getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty, Slot);
-  return CGM.getTypes().getABIInfo().EmitVAArg(*this, VAListAddr, Ty, Slot);
+    return CGM.getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty, Slot);
+  return CGM.getABIInfo().EmitVAArg(*this, VAListAddr, Ty, Slot);
 }
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 667e260f2228d..e5ba50de3462d 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -209,7 +209,7 @@ CodeGenModule::GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
     return nullptr;
 
   llvm::Type *PtrDiffTy =
-  Types.ConvertType(getContext().getPointerDiffType());
+      getTypes().ConvertType(getContext().getPointerDiffType());
 
   return llvm::ConstantInt::get(PtrDiffTy, Offset.getQuantity());
 }
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 63ed5b4dd0c31..cf5e29e5a3db8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -343,10 +343,11 @@ CodeGenModule::CodeGenModule(ASTContext &C,
     : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO),
       PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
       Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
-      VMContext(M.getContext()), Types(*this), VTables(*this),
+      VMContext(M.getContext()), VTables(*this),
       SanitizerMD(new SanitizerMetadata(*this)) {
 
   // Initialize the type cache.
+  Types.reset(new CodeGenTypes(*this));
   llvm::LLVMContext &LLVMContext = M.getContext();
   VoidTy = llvm::Type::getVoidTy(LLVMContext);
   Int8Ty = llvm::Type::getInt8Ty(LLVMContext);
@@ -405,7 +406,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
   if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
     TBAA.reset(new CodeGenTBAA(Context, getTypes(), TheModule, CodeGenOpts,
-                               getLangOpts(), getCXXABI().getMangleContext()));
+                               getLangOpts()));
 
   // If debug info or coverage generation is enabled, create the CGDebugInfo
   // object.
@@ -1452,12 +1453,12 @@ void CodeGenModule::EmitBackendOptionsMetadata(
 
 void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
   // Make sure that this type is translated.
-  Types.UpdateCompletedType(TD);
+  getTypes().UpdateCompletedType(TD);
 }
 
 void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) {
   // Make sure that this type is translated.
-  Types.RefreshTypeCacheForClass(RD);
+  getTypes().RefreshTypeCacheForClass(RD);
 }
 
 llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) {
@@ -5376,6 +5377,10 @@ void CodeGenModule::maybeSetTrivialComdat(const Decl &D,
   GO.setComdat(TheModule.getOrInsertComdat(GO.getName()));
 }
 
+const ABIInfo &CodeGenModule::getABIInfo() {
+  return getTargetCodeGenInfo().getABIInfo();
+}
+
 /// Pass IsTentative as true if you want to create a tentative definition.
 void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
                                             bool IsTentative) {
@@ -7784,7 +7789,5 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) {
 
   NewBuilder->WeakRefReferences = std::move(WeakRefReferences);
 
-  NewBuilder->TBAA = std::move(TBAA);
-
   NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx);
 }
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 284bba823baeb..c58bb88035ca8 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -320,7 +320,7 @@ class CodeGenModule : public CodeGenTypeCache {
   // This should not be moved earlier, since its initialization depends on some
   // of the previous reference members being already initialized and also checks
   // if TheTargetCodeGenInfo is NULL
-  CodeGenTypes Types;
+  std::unique_ptr<CodeGenTypes> Types;
 
   /// Holds information about C++ vtables.
   CodeGenVTables VTables;
@@ -776,6 +776,7 @@ class CodeGenModule : public CodeGenTypeCache {
   bool supportsCOMDAT() const;
   void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO);
 
+  const ABIInfo &getABIInfo();
   CGCXXABI &getCXXABI() const { return *ABI; }
   llvm::LLVMContext &getLLVMContext() { return VMContext; }
 
@@ -783,7 +784,7 @@ class CodeGenModule : public CodeGenTypeCache {
 
   const TargetCodeGenInfo &getTargetCodeGenInfo();
 
-  CodeGenTypes &getTypes() { return Types; }
+  CodeGenTypes &getTypes() { return *Types; }
 
   CodeGenVTables &getVTables() { return VTables; }
 
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index b66b6234c9d3d..2ce558d4bdf35 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -16,6 +16,7 @@
 
 #include "CodeGenTBAA.h"
 #include "ABIInfoImpl.h"
+#include "CGCXXABI.h"
 #include "CGRecordLayout.h"
 #include "CodeGenTypes.h"
 #include "clang/AST/ASTContext.h"
@@ -36,10 +37,10 @@ using namespace CodeGen;
 
 CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes,
                          llvm::Module &M, const CodeGenOptions &CGO,
-                         const LangOptions &Features, MangleContext &MContext)
+                         const LangOptions &Features)
     : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO),
-      Features(Features), MContext(MContext), MDHelper(M.getContext()),
-      Root(nullptr), Char(nullptr) {}
+      Features(Features), MDHelper(M.getContext()), Root(nullptr),
+      Char(nullptr) {}
 
 CodeGenTBAA::~CodeGenTBAA() {
 }
@@ -256,7 +257,8 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
 
     SmallString<256> OutName;
     llvm::raw_svector_ostream Out(OutName);
-    MContext.mangleCanonicalTypeName(QualType(ETy, 0), Out);
+    CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
+        QualType(ETy, 0), Out);
     return createScalarTypeNode(OutName, getChar(), Size);
   }
 
@@ -481,7 +483,8 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
     if (Features.CPlusPlus) {
       // Don't use the mangler for C code.
       llvm::raw_svector_ostream Out(OutName);
-      MContext.mangleCanonicalTypeName(QualType(Ty, 0), Out);
+      CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
+          QualType(Ty, 0), Out);
     } else {
       OutName = RD->getName();
     }
diff --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h
index 5d9ecec3ff0fe..ba74a39a4d25e 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.h
+++ b/clang/lib/CodeGen/CodeGenTBAA.h
@@ -24,7 +24,6 @@ namespace clang {
   class ASTContext;
   class CodeGenOptions;
   class LangOptions;
-  class MangleContext;
   class QualType;
   class Type;
 
@@ -120,7 +119,6 @@ class CodeGenTBAA {
   llvm::Module &Module;
   const CodeGenOptions &CodeGenOpts;
   const LangOptions &Features;
-  MangleContext &MContext;
 
   // MDHelper - Helper for creating metadata.
   llvm::MDBuilder MDHelper;
@@ -174,8 +172,7 @@ class CodeGenTBAA {
 
 public:
   CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, llvm::Module &M,
-              const CodeGenOptions &CGO, const LangOptions &Features,
-              MangleContext &MContext);
+              const CodeGenOptions &CGO, const LangOptions &Features);
   ~CodeGenTBAA();
 
   /// getTypeInfo - Get metadata used to describe accesses to objects of the
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index e0f567c5da342..f5deccdc1ba75 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -30,9 +30,8 @@ using namespace clang;
 using namespace CodeGen;
 
 CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
-  : CGM(cgm), Context(cgm.getContext()), TheModule(cgm.getModule()),
-    Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()),
-    TheABIInfo(cgm.getTargetCodeGenInfo().getABIInfo()) {
+    : CGM(cgm), Context(cgm.getContext()), TheModule(cgm.getModule()),
+      Target(cgm.getTarget()) {
   SkippedLayout = false;
   LongDoubleReferenced = false;
 }
@@ -43,6 +42,8 @@ CodeGenTypes::~CodeGenTypes() {
     delete &*I++;
 }
 
+CGCXXABI &CodeGenTypes::getCXXABI() const { return getCGM().getCXXABI(); }
+
 const CodeGenOptions &CodeGenTypes::getCodeGenOpts() const {
   return CGM.getCodeGenOpts();
 }
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index cbda2628e9140..5aebf9a212237 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -57,11 +57,6 @@ class CodeGenTypes {
   ASTContext &Context;
   llvm::Module &TheModule;
   const TargetInfo &Target;
-  CGCXXABI &TheCXXABI;
-
-  // This should not be moved earlier, since its initialization depends on some
-  // of the previous reference members being already initialized
-  const ABIInfo &TheABIInfo;
 
   /// The opaque type map for Objective-C interfaces. All direct
   /// manipulation is done by the runtime interfaces, which are
@@ -106,9 +101,8 @@ class CodeGenTypes {
   }
   CodeGenModule &getCGM() const { return CGM; }
   ASTContext &getContext() const { return Context; }
-  const ABIInfo &getABIInfo() const { return TheABIInfo; }
   const TargetInfo &getTarget() const { return Target; }
-  CGCXXABI &getCXXABI() const { return TheCXXABI; }
+  CGCXXABI &getCXXABI() const;
   llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); }
   const CodeGenOptions &getCodeGenOpts() const;
 
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index cc6740edabcd3..76d0191a7e63a 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1111,7 +1111,7 @@ static bool isTrivialForMSVC(const CXXRecordDecl *RD, QualType Ty,
   const Type *Base = nullptr;
   uint64_t NumElts = 0;
   if (CGM.getTarget().getTriple().isAArch64() &&
-      CGM.getTypes().getABIInfo().isHomogeneousAggregate(Ty, Base, NumElts) &&
+      CGM.getABIInfo().isHomogeneousAggregate(Ty, Base, NumElts) &&
       isa<VectorType>(Base)) {
     return true;
   }
diff --git a/clang/test/Interpreter/assigment-with-implicit-ctor.cpp b/clang/test/Interpreter/assigment-with-implicit-ctor.cpp
new file mode 100644
index 0000000000000..24cea8ec1a4b2
--- /dev/null
+++ b/clang/test/Interpreter/assigment-with-implicit-ctor.cpp
@@ -0,0 +1,13 @@
+// REQUIRES: host-supports-jit
+// UNSUPPORTED: system-aix
+//
+// RUN: cat %s | clang-repl | FileCheck %s
+// RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s
+
+struct box { box() = default; box(int *const data) : data{data} {} int *data{}; };
+
+box foo() { box ret; ret = new int{}; return ret; }
+
+extern "C" int printf(const char *, ...);
+printf("good");
+// CHECK: good

From d8752671e825ca5c967cc58a23778ae378c8dea2 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Date: Fri, 19 Jul 2024 22:22:55 +0200
Subject: [PATCH 365/427] Undef _TIME_BITS along with _FILE_OFFSET_BITS

This change is identical to
26800a2c7e7996dc773b4e990dd5cca41c45e1a9 ("[sanitizer] Undef
_TIME_BITS along with _FILE_OFFSET_BITS on Linux"), but for
sanitizer_procmaps_solaris.cpp.

Indeed, even though sanitizer_procmaps_solaris.cpp is Solaris
specific, it also gets built on Linux platforms. It also includes
sanitizer_platform.h, which also ends up including features-time64.h,
causing a build failure on 32-bit Linux platforms on which 64-bit
time_t is enabled by setting _TIME_BITS=64.

To fix this, we do the same change: undefine _TIME_BITS, which anyway
will cause no harm as the rest of this file is inside a
SANITIZER_SOLARIS compile-time conditional.

Fixes:

In file included from /home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features.h:394,
                 from ../../../../libsanitizer/sanitizer_common/sanitizer_platform.h:25,
                 from ../../../../libsanitizer/sanitizer_common/sanitizer_procmaps_solaris.cpp:14:
/home/thomas/buildroot/buildroot/output/host/i686-buildroot-linux-gnu/sysroot/usr/include/features-time64.h:26:5: error: #error "_TIME_BITS=64 is al
lowed only with _FILE_OFFSET_BITS=64"
   26 | #   error "_TIME_BITS=64 is allowed only with _FILE_OFFSET_BITS=64"
      |     ^~~~~

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Closes: https://github.com/llvm/llvm-project/pull/99699
(cherry picked from commit a1217020da219386b29c1a5a4a217904ecf07d7d)
---
 .../lib/sanitizer_common/sanitizer_procmaps_solaris.cpp       | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
index eeb49e2afe34d..80b8158f43db9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
@@ -11,6 +11,10 @@
 
 // Before Solaris 11.4, <procfs.h> doesn't work in a largefile environment.
 #undef _FILE_OFFSET_BITS
+
+// Avoid conflict between `_TIME_BITS` defined vs. `_FILE_OFFSET_BITS`
+// undefined in some Linux configurations.
+#undef _TIME_BITS
 #include "sanitizer_platform.h"
 #if SANITIZER_SOLARIS
 #  include <fcntl.h>

From 74b2743bd5b89c8184c1b3c1dd7ba49f41a5ba3f Mon Sep 17 00:00:00 2001
From: goldsteinn <35538541+goldsteinn@users.noreply.github.com>
Date: Tue, 15 Oct 2024 10:25:16 -0400
Subject: [PATCH 366/427]  [Inliner] Don't propagate access attr to byval
 params (#112256)

- **[Inliner] Add tests for bad propagationg of access attr for `byval`
param; NFC**
- **[Inliner] Don't propagate access attr to `byval` params**

We previously only handled the case where the `byval` attr was in the
callbase's param attr list. This PR also handles the case if the
`ByVal` was a param attr on the function's param attr list.

(cherry picked from commit 3c777f04f065dda5f0c80eaaef2a7f623ccc20ed)
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  2 +-
 .../Inline/access-attributes-prop.ll          | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 68696789530f4..fda1c22cc1fb7 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1395,7 +1395,7 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB,
         if (!Arg)
           continue;
 
-        if (AL.hasParamAttr(I, Attribute::ByVal))
+        if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))
           // It's unsound to propagate memory attributes to byval arguments.
           // Even if CalledFunction doesn't e.g. write to the argument,
           // the call to NewInnerCB may write to its by-value copy.
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index 2c55f5f3b1f6c..5051c92345ec7 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -580,3 +580,23 @@ define ptr @callee_bad_param_prop(ptr readonly %x) {
   %r = tail call ptr @llvm.ptrmask(ptr %x, i64 -1)
   ret ptr %r
 }
+
+define dso_local void @foo_byval_readonly2(ptr readonly %p) {
+; CHECK-LABEL: define {{[^@]+}}@foo_byval_readonly2
+; CHECK-SAME: (ptr readonly [[P:%.*]]) {
+; CHECK-NEXT:    call void @bar4(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @bar4(ptr %p)
+  ret void
+}
+
+define void @prop_byval_readonly2(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@prop_byval_readonly2
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT:    call void @bar4(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  call void @foo_byval_readonly2(ptr %p)
+  ret void
+}

From 19c571a631d962f05264c539f035f5e7fc5c166b Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Fri, 11 Oct 2024 10:31:27 +0800
Subject: [PATCH 367/427] [Clang] Instantiate Typedefs referenced by type alias
 deduction guides (#111804)

TypedefNameDecl referenced by a synthesized CTAD guide for type aliases
was not transformed previously, resulting in a substitution failure in
BuildDeductionGuideForTypeAlias() when substituting into the
right-hand-side deduction guide.

This patch fixes it in the way we have been doing since
https://reviews.llvm.org/D80743. We transform all the function
parameters, parenting referenced TypedefNameDecls with the
CXXDeductionGuideDecl. Then we instantiate these declarations in
FindInstantiatedDecl() as we build up the eventual deduction guide,
using the mechanism introduced in D80743

Fixes #111508

(cherry picked from commit 0bc02b999a9686ba240b7a68d3f1cbbf037d2170)
---
 clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 21 ++++++++++++++++---
 clang/test/SemaCXX/cxx20-ctad-type-alias.cpp  | 13 ++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
index 0602d07c6b9b0..1bf82b31def97 100644
--- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
+++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -69,8 +69,8 @@ class ExtractTypeForDeductionGuide
   ExtractTypeForDeductionGuide(
       Sema &SemaRef,
       llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs,
-      ClassTemplateDecl *NestedPattern,
-      const MultiLevelTemplateArgumentList *OuterInstantiationArgs)
+      ClassTemplateDecl *NestedPattern = nullptr,
+      const MultiLevelTemplateArgumentList *OuterInstantiationArgs = nullptr)
       : Base(SemaRef), MaterializedTypedefs(MaterializedTypedefs),
         NestedPattern(NestedPattern),
         OuterInstantiationArgs(OuterInstantiationArgs) {
@@ -1263,10 +1263,25 @@ FunctionTemplateDecl *DeclareAggregateDeductionGuideForTypeAlias(
       getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate).first;
   if (!RHSTemplate)
     return nullptr;
+
+  llvm::SmallVector<TypedefNameDecl *> TypedefDecls;
+  llvm::SmallVector<QualType> NewParamTypes;
+  ExtractTypeForDeductionGuide TypeAliasTransformer(SemaRef, TypedefDecls);
+  for (QualType P : ParamTypes) {
+    QualType Type = TypeAliasTransformer.TransformType(P);
+    if (Type.isNull())
+      return nullptr;
+    NewParamTypes.push_back(Type);
+  }
+
   auto *RHSDeductionGuide = SemaRef.DeclareAggregateDeductionGuideFromInitList(
-      RHSTemplate, ParamTypes, Loc);
+      RHSTemplate, NewParamTypes, Loc);
   if (!RHSDeductionGuide)
     return nullptr;
+
+  for (TypedefNameDecl *TD : TypedefDecls)
+    TD->setDeclContext(RHSDeductionGuide->getTemplatedDecl());
+
   return BuildDeductionGuideForTypeAlias(SemaRef, AliasTemplate,
                                          RHSDeductionGuide, Loc);
 }
diff --git a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
index 5392573fcdb9d..675c32a81f1ae 100644
--- a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
+++ b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp
@@ -481,3 +481,16 @@ struct Out {
 Out<float>::B out(100); // deduced to Out<float>::A<float>;
 static_assert(__is_same(decltype(out), Out<float>::A<float>));
 }
+
+namespace GH111508 {
+
+template <typename V> struct S {
+  using T = V;
+  T Data;
+};
+
+template <typename V> using Alias = S<V>;
+
+Alias A(42);
+
+} // namespace GH111508

From 21ed37e3e725a7f58c2eb347519e500ebddb57ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Mon, 14 Oct 2024 13:18:04 +0200
Subject: [PATCH 368/427] [LLVM] [Clang] Backport "Support for Gentoo `*t64`
 triples (64-bit time_t ABIs)"

This is a backport of 387b37af1aabf325e9be844361564dfad8d45c75 for 19.x,
adjusted to add new Triple::EnvironmentType members at the end to avoid
breaking backwards ABI compatibility.

Gentoo is planning to introduce a `*t64` suffix for triples that will be
used by 32-bit platforms that use 64-bit `time_t`. Add support for
parsing and accepting these triples, and while at it make clang
automatically enable the necessary glibc feature macros when this suffix
is used.
---
 clang/lib/Basic/Targets/ARM.cpp            |  2 +
 clang/lib/Basic/Targets/OSTargets.h        |  4 ++
 clang/lib/CodeGen/CodeGenModule.cpp        |  5 +-
 clang/lib/CodeGen/Targets/ARM.cpp          |  3 ++
 clang/lib/Driver/Driver.cpp                |  5 +-
 clang/lib/Driver/ToolChains/Arch/ARM.cpp   |  7 +++
 clang/lib/Driver/ToolChains/Gnu.cpp        |  2 +
 clang/lib/Driver/ToolChains/Linux.cpp      |  1 +
 clang/test/Preprocessor/time64.c           | 12 +++++
 llvm/include/llvm/TargetParser/Triple.h    | 35 ++++++++++---
 llvm/lib/Target/ARM/ARMSubtarget.h         |  4 +-
 llvm/lib/Target/ARM/ARMTargetMachine.cpp   |  2 +
 llvm/lib/Target/ARM/ARMTargetMachine.h     |  1 +
 llvm/lib/TargetParser/ARMTargetParser.cpp  |  3 ++
 llvm/lib/TargetParser/Triple.cpp           |  6 +++
 llvm/unittests/TargetParser/TripleTest.cpp | 58 ++++++++++++++++++++++
 16 files changed, 138 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Preprocessor/time64.c

diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 7423626d7c3cb..e55feedbd5c6f 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -311,7 +311,9 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple,
     switch (Triple.getEnvironment()) {
     case llvm::Triple::Android:
     case llvm::Triple::GNUEABI:
+    case llvm::Triple::GNUEABIT64:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::GNUEABIHFT64:
     case llvm::Triple::MuslEABI:
     case llvm::Triple::MuslEABIHF:
     case llvm::Triple::OpenHOS:
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 5f27c3469f861..357c1965057c9 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -337,6 +337,10 @@ class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public OSTargetInfo<Target> {
       Builder.defineMacro("_GNU_SOURCE");
     if (this->HasFloat128)
       Builder.defineMacro("__FLOAT128__");
+    if (Triple.isTime64ABI()) {
+      Builder.defineMacro("_FILE_OFFSET_BITS", "64");
+      Builder.defineMacro("_TIME_BITS", "64");
+    }
   }
 
 public:
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index cf5e29e5a3db8..8d9beab9fa7f8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -177,10 +177,7 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
     else if (ABIStr == "aapcs16")
       Kind = ARMABIKind::AAPCS16_VFP;
     else if (CodeGenOpts.FloatABI == "hard" ||
-             (CodeGenOpts.FloatABI != "soft" &&
-              (Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
-               Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
-               Triple.getEnvironment() == llvm::Triple::EABIHF)))
+             (CodeGenOpts.FloatABI != "soft" && Triple.isHardFloatABI()))
       Kind = ARMABIKind::AAPCS_VFP;
 
     return createARMTargetCodeGenInfo(CGM, Kind);
diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
index d032b88d7683c..457d761039a08 100644
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -35,7 +35,9 @@ class ARMABIInfo : public ABIInfo {
     case llvm::Triple::EABI:
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABI:
+    case llvm::Triple::GNUEABIT64:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::GNUEABIHFT64:
     case llvm::Triple::MuslEABI:
     case llvm::Triple::MuslEABIHF:
       return true;
@@ -48,6 +50,7 @@ class ARMABIInfo : public ABIInfo {
     switch (getTarget().getTriple().getEnvironment()) {
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::GNUEABIHFT64:
     case llvm::Triple::MuslEABIHF:
       return true;
     default:
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 8e44d5afa40e0..ecae475f75da0 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -602,7 +602,8 @@ static llvm::Triple computeTargetTriple(const Driver &D,
     if (A->getOption().matches(options::OPT_m64) ||
         A->getOption().matches(options::OPT_maix64)) {
       AT = Target.get64BitArchVariant().getArch();
-      if (Target.getEnvironment() == llvm::Triple::GNUX32)
+      if (Target.getEnvironment() == llvm::Triple::GNUX32 ||
+          Target.getEnvironment() == llvm::Triple::GNUT64)
         Target.setEnvironment(llvm::Triple::GNU);
       else if (Target.getEnvironment() == llvm::Triple::MuslX32)
         Target.setEnvironment(llvm::Triple::Musl);
@@ -665,11 +666,13 @@ static llvm::Triple computeTargetTriple(const Driver &D,
       } else if (ABIName == "n32") {
         Target = Target.get64BitArchVariant();
         if (Target.getEnvironment() == llvm::Triple::GNU ||
+            Target.getEnvironment() == llvm::Triple::GNUT64 ||
             Target.getEnvironment() == llvm::Triple::GNUABI64)
           Target.setEnvironment(llvm::Triple::GNUABIN32);
       } else if (ABIName == "64") {
         Target = Target.get64BitArchVariant();
         if (Target.getEnvironment() == llvm::Triple::GNU ||
+            Target.getEnvironment() == llvm::Triple::GNUT64 ||
             Target.getEnvironment() == llvm::Triple::GNUABIN32)
           Target.setEnvironment(llvm::Triple::GNUABI64);
       }
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index a6041b809b80b..0489911ecd9de 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -327,6 +327,11 @@ void arm::setFloatABIInTriple(const Driver &D, const ArgList &Args,
     Triple.setEnvironment(isHardFloat ? llvm::Triple::GNUEABIHF
                                       : llvm::Triple::GNUEABI);
     break;
+  case llvm::Triple::GNUEABIT64:
+  case llvm::Triple::GNUEABIHFT64:
+    Triple.setEnvironment(isHardFloat ? llvm::Triple::GNUEABIHFT64
+                                      : llvm::Triple::GNUEABIT64);
+    break;
   case llvm::Triple::EABI:
   case llvm::Triple::EABIHF:
     Triple.setEnvironment(isHardFloat ? llvm::Triple::EABIHF
@@ -414,10 +419,12 @@ arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) {
       return FloatABI::Soft;
     switch (Triple.getEnvironment()) {
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::GNUEABIHFT64:
     case llvm::Triple::MuslEABIHF:
     case llvm::Triple::EABIHF:
       return FloatABI::Hard;
     case llvm::Triple::GNUEABI:
+    case llvm::Triple::GNUEABIT64:
     case llvm::Triple::MuslEABI:
     case llvm::Triple::EABI:
       // EABI is always AAPCS, and if it was not marked 'hard', it's softfp
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 543f3965dfd4f..5e9a655eaf824 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2694,6 +2694,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
   case llvm::Triple::thumb:
     LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs));
     if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+        TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHFT64 ||
         TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF ||
         TargetTriple.getEnvironment() == llvm::Triple::EABIHF) {
       TripleAliases.append(begin(ARMHFTriples), end(ARMHFTriples));
@@ -2705,6 +2706,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
   case llvm::Triple::thumbeb:
     LibDirs.append(begin(ARMebLibDirs), end(ARMebLibDirs));
     if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+        TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHFT64 ||
         TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF ||
         TargetTriple.getEnvironment() == llvm::Triple::EABIHF) {
       TripleAliases.append(begin(ARMebHFTriples), end(ARMebHFTriples));
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 2265138edbffb..35bf390696050 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -508,6 +508,7 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
   case llvm::Triple::thumbeb: {
     const bool HF =
         Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+        Triple.getEnvironment() == llvm::Triple::GNUEABIHFT64 ||
         tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard;
 
     LibDir = "lib";
diff --git a/clang/test/Preprocessor/time64.c b/clang/test/Preprocessor/time64.c
new file mode 100644
index 0000000000000..e9e26e08ecd87
--- /dev/null
+++ b/clang/test/Preprocessor/time64.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -E -dM -triple=i686-pc-linux-gnu /dev/null | FileCheck -match-full-lines -check-prefix TIME32 %s
+// RUN: %clang_cc1 -E -dM -triple=i686-pc-linux-gnut64 /dev/null | FileCheck -match-full-lines -check-prefix TIME64 %s
+// RUN: %clang_cc1 -E -dM -triple=armv5tel-softfloat-linux-gnueabi /dev/null | FileCheck -match-full-lines -check-prefix TIME32 %s
+// RUN: %clang_cc1 -E -dM -triple=armv5tel-softfloat-linux-gnueabit64 /dev/null | FileCheck -match-full-lines -check-prefix TIME64 %s
+// RUN: %clang_cc1 -E -dM -triple=armv7a-unknown-linux-gnueabihf /dev/null | FileCheck -match-full-lines -check-prefix TIME32 %s
+// RUN: %clang_cc1 -E -dM -triple=armv7a-unknown-linux-gnueabihft64 /dev/null | FileCheck -match-full-lines -check-prefix TIME64 %s
+//
+// TIME32-NOT:#define _FILE_OFFSET_BITS 64
+// TIME32-NOT:#define _TIME_BITS 64
+//
+// TIME64:#define _FILE_OFFSET_BITS 64
+// TIME64:#define _TIME_BITS 64
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index ebd92f264d904..d2126a03db906 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -294,7 +294,11 @@ class Triple {
 
     PAuthTest,
 
-    LastEnvironmentType = PAuthTest
+    GNUT64,
+    GNUEABIT64,
+    GNUEABIHFT64,
+
+    LastEnvironmentType = GNUEABIHFT64
   };
   enum ObjectFormatType {
     UnknownObjectFormat,
@@ -605,11 +609,12 @@ class Triple {
 
   bool isGNUEnvironment() const {
     EnvironmentType Env = getEnvironment();
-    return Env == Triple::GNU || Env == Triple::GNUABIN32 ||
-           Env == Triple::GNUABI64 || Env == Triple::GNUEABI ||
-           Env == Triple::GNUEABIHF || Env == Triple::GNUF32 ||
-           Env == Triple::GNUF64 || Env == Triple::GNUSF ||
-           Env == Triple::GNUX32;
+    return Env == Triple::GNU || Env == Triple::GNUT64 ||
+           Env == Triple::GNUABIN32 || Env == Triple::GNUABI64 ||
+           Env == Triple::GNUEABI || Env == Triple::GNUEABIT64 ||
+           Env == Triple::GNUEABIHF || Env == Triple::GNUEABIHFT64 ||
+           Env == Triple::GNUF32 || Env == Triple::GNUF64 ||
+           Env == Triple::GNUSF || Env == Triple::GNUX32;
   }
 
   /// Tests whether the OS is Haiku.
@@ -866,9 +871,11 @@ class Triple {
     return (isARM() || isThumb()) &&
            (getEnvironment() == Triple::EABI ||
             getEnvironment() == Triple::GNUEABI ||
+            getEnvironment() == Triple::GNUEABIT64 ||
             getEnvironment() == Triple::MuslEABI ||
             getEnvironment() == Triple::EABIHF ||
             getEnvironment() == Triple::GNUEABIHF ||
+            getEnvironment() == Triple::GNUEABIHFT64 ||
             getEnvironment() == Triple::OpenHOS ||
             getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
            isOSBinFormatELF();
@@ -1046,6 +1053,22 @@ class Triple {
     return getArch() == Triple::bpfel || getArch() == Triple::bpfeb;
   }
 
+  /// Tests if the target forces 64-bit time_t on a 32-bit architecture.
+  bool isTime64ABI() const {
+    EnvironmentType Env = getEnvironment();
+    return Env == Triple::GNUT64 || Env == Triple::GNUEABIT64 ||
+           Env == Triple::GNUEABIHFT64;
+  }
+
+  /// Tests if the target forces hardfloat.
+  bool isHardFloatABI() const {
+    EnvironmentType Env = getEnvironment();
+    return Env == llvm::Triple::GNUEABIHF ||
+           Env == llvm::Triple::GNUEABIHFT64 ||
+           Env == llvm::Triple::MuslEABIHF ||
+           Env == llvm::Triple::EABIHF;
+  }
+
   /// Tests whether the target supports comdat
   bool supportsCOMDAT() const {
     return !(isOSBinFormatMachO() || isOSBinFormatXCOFF() ||
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 00239ff94b7ba..6d301efd56180 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -325,7 +325,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   }
   bool isTargetGNUAEABI() const {
     return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
-            TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+            TargetTriple.getEnvironment() == Triple::GNUEABIT64 ||
+            TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+            TargetTriple.getEnvironment() == Triple::GNUEABIHFT64) &&
            !isTargetDarwin() && !isTargetWindows();
   }
   bool isTargetMuslAEABI() const {
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 7553778c57403..a58c63dcf762d 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -241,7 +241,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
       Options.EABIVersion == EABI::Unknown) {
     // musl is compatible with glibc with regard to EABI version
     if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
+         TargetTriple.getEnvironment() == Triple::GNUEABIT64 ||
          TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+         TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 ||
          TargetTriple.getEnvironment() == Triple::MuslEABI ||
          TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
          TargetTriple.getEnvironment() == Triple::OpenHOS) &&
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h
index 69d8fa8ada649..75ee50f0e93c8 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -64,6 +64,7 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
 
   bool isTargetHardFloat() const {
     return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
+           TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 ||
            TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
            TargetTriple.getEnvironment() == Triple::EABIHF ||
            (TargetTriple.isOSBinFormatMachO() &&
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index 9d9917d86a368..5e9dd94b84b28 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -554,7 +554,9 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
   switch (TT.getEnvironment()) {
   case Triple::Android:
   case Triple::GNUEABI:
+  case Triple::GNUEABIT64:
   case Triple::GNUEABIHF:
+  case Triple::GNUEABIHFT64:
   case Triple::MuslEABI:
   case Triple::MuslEABIHF:
   case Triple::OpenHOS:
@@ -635,6 +637,7 @@ StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) {
     switch (Triple.getEnvironment()) {
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::GNUEABIHFT64:
     case llvm::Triple::MuslEABIHF:
       return "arm1176jzf-s";
     default:
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index bf89aace65e58..21d6c74b59560 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -317,10 +317,13 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case EABI: return "eabi";
   case EABIHF: return "eabihf";
   case GNU: return "gnu";
+  case GNUT64: return "gnut64";
   case GNUABI64: return "gnuabi64";
   case GNUABIN32: return "gnuabin32";
   case GNUEABI: return "gnueabi";
+  case GNUEABIT64: return "gnueabit64";
   case GNUEABIHF: return "gnueabihf";
+  case GNUEABIHFT64: return "gnueabihft64";
   case GNUF32: return "gnuf32";
   case GNUF64: return "gnuf64";
   case GNUSF: return "gnusf";
@@ -693,7 +696,9 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
       .StartsWith("eabi", Triple::EABI)
       .StartsWith("gnuabin32", Triple::GNUABIN32)
       .StartsWith("gnuabi64", Triple::GNUABI64)
+      .StartsWith("gnueabihft64", Triple::GNUEABIHFT64)
       .StartsWith("gnueabihf", Triple::GNUEABIHF)
+      .StartsWith("gnueabit64", Triple::GNUEABIT64)
       .StartsWith("gnueabi", Triple::GNUEABI)
       .StartsWith("gnuf32", Triple::GNUF32)
       .StartsWith("gnuf64", Triple::GNUF64)
@@ -701,6 +706,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
       .StartsWith("gnux32", Triple::GNUX32)
       .StartsWith("gnu_ilp32", Triple::GNUILP32)
       .StartsWith("code16", Triple::CODE16)
+      .StartsWith("gnut64", Triple::GNUT64)
       .StartsWith("gnu", Triple::GNU)
       .StartsWith("android", Triple::Android)
       .StartsWith("musleabihf", Triple::MuslEABIHF)
diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp
index 0aecfc64da208..ae5df4621df94 100644
--- a/llvm/unittests/TargetParser/TripleTest.cpp
+++ b/llvm/unittests/TargetParser/TripleTest.cpp
@@ -93,6 +93,13 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Hurd, T.getOS());
   EXPECT_EQ(Triple::GNU, T.getEnvironment());
 
+  T = Triple("i686-pc-linux-gnu");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNU, T.getEnvironment());
+  EXPECT_FALSE(T.isTime64ABI());
+
   T = Triple("x86_64-pc-linux-gnu");
   EXPECT_EQ(Triple::x86_64, T.getArch());
   EXPECT_EQ(Triple::PC, T.getVendor());
@@ -187,24 +194,52 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
   EXPECT_EQ(Triple::EABI, T.getEnvironment());
+  EXPECT_FALSE(T.isHardFloatABI());
+
+  T = Triple("arm-none-none-eabihf");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::EABIHF, T.getEnvironment());
+  EXPECT_TRUE(T.isHardFloatABI());
 
   T = Triple("arm-none-linux-musleabi");
   EXPECT_EQ(Triple::arm, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::MuslEABI, T.getEnvironment());
+  EXPECT_FALSE(T.isHardFloatABI());
+
+  T = Triple("arm-none-linux-musleabihf");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::MuslEABIHF, T.getEnvironment());
+  EXPECT_TRUE(T.isHardFloatABI());
 
   T = Triple("armv6hl-none-linux-gnueabi");
   EXPECT_EQ(Triple::arm, T.getArch());
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::GNUEABI, T.getEnvironment());
+  EXPECT_FALSE(T.isTime64ABI());
+  EXPECT_FALSE(T.isHardFloatABI());
 
   T = Triple("armv7hl-none-linux-gnueabi");
   EXPECT_EQ(Triple::arm, T.getArch());
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::GNUEABI, T.getEnvironment());
+  EXPECT_FALSE(T.isTime64ABI());
+  EXPECT_FALSE(T.isHardFloatABI());
+
+  T = Triple("armv7hl-none-linux-gnueabihf");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::GNUEABIHF, T.getEnvironment());
+  EXPECT_FALSE(T.isTime64ABI());
+  EXPECT_TRUE(T.isHardFloatABI());
 
   T = Triple("amdil-unknown-unknown");
   EXPECT_EQ(Triple::amdil, T.getArch());
@@ -1175,6 +1210,29 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::PAuthTest, T.getEnvironment());
 
+  // Gentoo time64 triples
+  T = Triple("i686-pc-linux-gnut64");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNUT64, T.getEnvironment());
+  EXPECT_TRUE(T.isTime64ABI());
+
+  T = Triple("armv5tel-softfloat-linux-gnueabit64");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNUEABIT64, T.getEnvironment());
+  EXPECT_TRUE(T.isTime64ABI());
+
+  T = Triple("armv7a-unknown-linux-gnueabihft64");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNUEABIHFT64, T.getEnvironment());
+  EXPECT_TRUE(T.isTime64ABI());
+  EXPECT_TRUE(T.isHardFloatABI());
+
   T = Triple("huh");
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
 }

From 17365dfd21011cbd5972a88dd5dd45575f2c81ec Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 29 Oct 2024 09:57:40 +0100
Subject: [PATCH 369/427] Bump version to 19.1.3

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index e521cf77fd58a..d57561fcd1748 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 2)
+  set(LLVM_VERSION_PATCH 3)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 272375907eb50..ecb21a705c515 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190102
+#  define _LIBCPP_VERSION 190103
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 1e041c6b506bb..fa570bad3b1be 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 2
+llvm_version_patch = 3
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index cebdf15cefc55..9c7b53d82aae2 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 2)
+__versioninfo__ = (19, 1, 3)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 99fd714184410..95aa8e2684d06 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 2)
+__versioninfo__ = (19, 1, 3)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 6e006e11f3509f0f3eeac9f3b092176cf7c1832f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 15 Oct 2024 09:34:55 +0200
Subject: [PATCH 370/427] [WebAssembly] Fix feature coalescing (#110647)

This fixes a problem introduced in #80094. That PR copied negative
features from the TargetMachine to the end of the feature string. This
is not correct, because even if we have a baseline TM of say `-simd128`,
but a function with `+simd128`, the coalesced feature string should have
`+simd128`, not `-simd128`.

To address the original motivation of that PR, we should instead
explicitly materialize the negative features in the target feature
string, so that explicitly disabled default features are honored.

Unfortunately, there doesn't seem to be any way to actually test this
using llc, because `-mattr` appends the specified features to the end of
the `"target-features"` attribute. I've tested this locally by making it
prepend the features instead.

(cherry picked from commit 5a7b79c93e2e0c71aec016973f5f13d3bb2e7a62)
---
 .../Target/WebAssembly/WebAssemblyTargetMachine.cpp  | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 23539a5f4b26f..ac9e6d5a90cb3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -202,8 +202,7 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
   bool runOnModule(Module &M) override {
     FeatureBitset Features = coalesceFeatures(M);
 
-    std::string FeatureStr =
-        getFeatureString(Features, WasmTM->getTargetFeatureString());
+    std::string FeatureStr = getFeatureString(Features);
     WasmTM->setTargetFeatureString(FeatureStr);
     for (auto &F : M)
       replaceFeatures(F, FeatureStr);
@@ -241,17 +240,14 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
     return Features;
   }
 
-  static std::string getFeatureString(const FeatureBitset &Features,
-                                      StringRef TargetFS) {
+  static std::string getFeatureString(const FeatureBitset &Features) {
     std::string Ret;
     for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
       if (Features[KV.Value])
         Ret += (StringRef("+") + KV.Key + ",").str();
+      else
+        Ret += (StringRef("-") + KV.Key + ",").str();
     }
-    SubtargetFeatures TF{TargetFS};
-    for (std::string const &F : TF.getFeatures())
-      if (!SubtargetFeatures::isEnabled(F))
-        Ret += F + ",";
     return Ret;
   }
 

From 6ee49080e4bb43efe7ede10bed15935853bbd434 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Tue, 30 Jul 2024 09:02:07 +0800
Subject: [PATCH 371/427] [NFC] fix build failure (#100993)

Fix the build failure caused by
https://github.com/llvm/llvm-project/pull/94944

Fixes https://github.com/llvm/llvm-project/issues/100296

(cherry picked from commit 40b4fd7a3e81d32b29364a1b15337bcf817659c0)
---
 llvm/lib/Analysis/ConstantFolding.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index df75745645e04..ff30fece5fce9 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1784,8 +1784,8 @@ Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
 }
 
 #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
-Constant *ConstantFoldFP128(long double (*NativeFP)(long double),
-                            const APFloat &V, Type *Ty) {
+Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
+                            Type *Ty) {
   llvm_fenv_clearexcept();
   float128 Result = NativeFP(V.convertToQuad());
   if (llvm_fenv_testexcept()) {

From 062adaf7d2f101ed9e83c09d192dd228dcbd9ffd Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com>
Date: Thu, 17 Oct 2024 13:42:35 +0200
Subject: [PATCH 372/427] [clang] Reject if constexpr in C (#112685)

Fixes https://github.com/llvm/llvm-project/issues/112587
---
 clang/lib/Parse/ParseStmt.cpp | 11 +++++++----
 clang/test/Sema/constexpr.c   |  7 +++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index 22d38adc28ebe..3ac1f0fa27f83 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -1508,10 +1508,13 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
   SourceLocation ConstevalLoc;
 
   if (Tok.is(tok::kw_constexpr)) {
-    Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
-                                        : diag::ext_constexpr_if);
-    IsConstexpr = true;
-    ConsumeToken();
+    // C23 supports constexpr keyword, but only for object definitions.
+    if (getLangOpts().CPlusPlus) {
+      Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
+                                          : diag::ext_constexpr_if);
+      IsConstexpr = true;
+      ConsumeToken();
+    }
   } else {
     if (Tok.is(tok::exclaim)) {
       NotLocation = ConsumeToken();
diff --git a/clang/test/Sema/constexpr.c b/clang/test/Sema/constexpr.c
index 8286cd2107d2f..a874fd6480840 100644
--- a/clang/test/Sema/constexpr.c
+++ b/clang/test/Sema/constexpr.c
@@ -357,3 +357,10 @@ void infsNaNs() {
   constexpr double db5 = LD_SNAN; // expected-error {{constexpr initializer evaluates to nan which is not exactly representable in type 'const double'}}
   constexpr double db6 = INF;
 }
+
+void constexprif() {
+  if constexpr (300) {} //expected-error {{expected '(' after 'if'}}
+}
+void constevalif() {
+  if consteval (300) {} //expected-error {{expected '(' after 'if'}}
+}

From 108ab46cf57d769795c8310cebcf73e3a0c189ad Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 17 Oct 2024 21:52:40 +0000
Subject: [PATCH 373/427] [Clang] Disable use of the counted_by attribute for
 whole struct pointers (#112636)

The whole struct is specificed in the __bdos. The calculation of the
whole size of the structure can be done in two ways:

    1) sizeof(struct S) + count * sizeof(typeof(fam))
    2) offsetof(struct S, fam) + count * sizeof(typeof(fam))

The first will add any remaining whitespace that might exist after
allocation while the second method is more precise, but not quite
expected from programmers. See [1] for a discussion of the topic.

GCC isn't (currently) able to calculate __bdos on a pointer to the whole
structure. Therefore, because of the above issue, we'll choose to match
what GCC does for consistency's sake.

[1] https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/

Co-authored-by: Eli Friedman <efriedma@quicinc.com>
---
 clang/lib/CodeGen/CGBuiltin.cpp      |  45 +++--
 clang/test/CodeGen/attr-counted-by.c | 253 +++++++++------------------
 2 files changed, 107 insertions(+), 191 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5639239359ab8..86d47054615e6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1001,6 +1001,24 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
     // Can't find the field referenced by the "counted_by" attribute.
     return nullptr;
 
+  if (isa<DeclRefExpr>(Base))
+    // The whole struct is specificed in the __bdos. The calculation of the
+    // whole size of the structure can be done in two ways:
+    //
+    //     1) sizeof(struct S) + count * sizeof(typeof(fam))
+    //     2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
+    //
+    // The first will add additional padding after the end of the array,
+    // allocation while the second method is more precise, but not quite
+    // expected from programmers. See
+    // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
+    // discussion of the topic.
+    //
+    // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
+    // structure. Therefore, because of the above issue, we'll choose to match
+    // what GCC does for consistency's sake.
+    return nullptr;
+
   // Build a load of the counted_by field.
   bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
   Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
@@ -1031,32 +1049,9 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
   CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
   llvm::Constant *ElemSize =
       llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
-  Value *FAMSize =
+  Value *Res =
       Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
-  FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
-  Value *Res = FAMSize;
-
-  if (isa<DeclRefExpr>(Base)) {
-    // The whole struct is specificed in the __bdos.
-    const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
-
-    // Get the offset of the FAM.
-    llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
-    Value *OffsetAndFAMSize =
-        Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
-
-    // Get the full size of the struct.
-    llvm::Constant *SizeofStruct =
-        ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
-
-    // max(sizeof(struct s),
-    //     offsetof(struct s, array) + p->count * sizeof(*p->array))
-    Res = IsSigned
-              ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
-                                              OffsetAndFAMSize, SizeofStruct)
-              : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
-                                              OffsetAndFAMSize, SizeofStruct);
-  }
+  Res = Builder.CreateIntCast(Res, ResType, IsSigned);
 
   // A negative \p IdxInst or \p CountedByInst means that the index lands
   // outside of the flexible array member. If that's the case, we want to
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index 322ef2d0f7db1..65e6c042730b4 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -66,7 +66,7 @@ struct anon_struct {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -114,7 +114,7 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -203,36 +203,20 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP3]], i64 4)
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 12
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP6]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 12
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[INDEX]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test3(
@@ -254,34 +238,18 @@ size_t test2_bdos(struct annotated *p) {
 void test3(struct annotated *p, size_t index) {
   // This test differs from 'test2' by checking bdos on the whole array and not
   // just the FAM.
-  p->array[index] = __builtin_dynamic_object_size(p, 1);
+  p->array[index] = __builtin_dynamic_object_size(p, 0);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 12
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0
-// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP5]]
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 4)
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 12
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], -1
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP5]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test3_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
@@ -294,7 +262,7 @@ void test3(struct annotated *p, size_t index) {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
 //
 size_t test3_bdos(struct annotated *p) {
-  return __builtin_dynamic_object_size(p, 1);
+  return __builtin_dynamic_object_size(p, 0);
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
@@ -308,7 +276,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], 2
@@ -325,7 +293,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[IDXPROM12]], [[TMP6]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP7]], label [[CONT19:%.*]], label [[HANDLER_OUT_OF_BOUNDS15:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds15:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont19:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD6]], 3
@@ -342,7 +310,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP13:%.*]] = icmp ult i64 [[IDXPROM28]], [[TMP12]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP13]], label [[CONT35:%.*]], label [[HANDLER_OUT_OF_BOUNDS31:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds31:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont35:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM28]]
@@ -488,39 +456,27 @@ size_t test4_bdos(struct annotated *p, int index) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOTCOUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]]
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i64 [[DOT_COUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD_TR:%.*]] = trunc i64 [[DOT_COUNTED_BY_LOAD]] to i32
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD_TR]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 16
-// SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP3]]
-// SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
+// SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test5(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i64 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD_TR:%.*]] = trunc i64 [[DOT_COUNTED_BY_LOAD]] to i32
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD_TR]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 16
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP1]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i32 -1, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test5(
@@ -545,27 +501,15 @@ void test5(struct anon_struct *p, int index) {
   p->array[index] = __builtin_dynamic_object_size(p, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[DOT_COUNTED_BY_LOAD]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = add nuw i64 [[TMP0]], 16
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i64 [[DOT_COUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = select i1 [[DOTINV]], i64 0, i64 [[TMP1]]
-// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP2]]
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[DOT_COUNTED_BY_LOAD]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = add nuw i64 [[TMP0]], 16
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i64 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = select i1 [[DOTINV]], i64 0, i64 [[TMP1]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP2]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test5_bdos(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
@@ -590,7 +534,7 @@ size_t test5_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
@@ -683,7 +627,7 @@ size_t test6_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
@@ -723,12 +667,12 @@ void test7(struct union_of_fams *p, int index) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test7_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -756,7 +700,7 @@ size_t test7_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont9:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
@@ -837,7 +781,7 @@ size_t test8_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -877,12 +821,12 @@ void test9(struct union_of_fams *p, int index) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test9_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -910,7 +854,7 @@ size_t test9_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont9:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -997,7 +941,7 @@ size_t test10_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -1037,12 +981,12 @@ void test11(struct annotated *p, int index) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test11_bdos(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 4
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test11_bdos(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 4
 //
@@ -1076,16 +1020,16 @@ struct hang {
 int test12_a, test12_b;
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
-// SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR11:[0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR10:[0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT9:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
@@ -1095,17 +1039,17 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @21, i64 ptrtoint (ptr getelementptr inbounds (i8, ptr @test12_foo, i64 4) to i64)) #9, !nosanitize !2
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
-// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR12:[0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR11:[0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[IDXPROM]]
@@ -1188,7 +1132,7 @@ struct test13_bar {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
@@ -1197,7 +1141,7 @@ struct test13_bar {
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 0
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test13(
-// NO-SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA8:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
@@ -1249,14 +1193,14 @@ struct test14_foo {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       trap:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR10]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR9]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14(
-// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca [[STRUCT_TEST14_FOO:%.*]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i32 1, ptr [[DOTCOMPOUNDLITERAL]], align 4, !tbaa [[TBAA2]]
@@ -1305,14 +1249,14 @@ int test14(int idx) {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       trap:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR10]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR9]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15(
-// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i32], ptr getelementptr inbounds (i8, ptr @__const.test15.foo, i64 8), i64 0, i64 [[IDXPROM]]
@@ -1350,12 +1294,12 @@ int test15(int idx) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test19(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test19(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -1375,12 +1319,12 @@ size_t test19(struct annotated *p) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test20(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -1400,12 +1344,12 @@ size_t test20(struct annotated *p) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test21(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -1425,12 +1369,12 @@ size_t test21(struct annotated *p) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test22(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -1450,12 +1394,12 @@ size_t test22(struct annotated *p) {
 }
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i64 @test23(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readnone [[P:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
 //
@@ -1487,7 +1431,7 @@ struct tests_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
@@ -1528,7 +1472,7 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
@@ -1536,7 +1480,7 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test25(
-// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA11]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
@@ -1580,7 +1524,7 @@ struct test26_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8
@@ -1651,7 +1595,7 @@ struct test27_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 24
@@ -1717,7 +1661,7 @@ struct test28_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont17:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12
@@ -1726,7 +1670,7 @@ struct test28_foo {
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP5]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test28(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR9]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[I:%.*]]) local_unnamed_addr #[[ATTR8]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 8, !tbaa [[TBAA11]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA11]]
@@ -1779,7 +1723,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
@@ -1791,7 +1735,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM15]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds16:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont20:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12
@@ -1803,7 +1747,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test29(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[ANN:%.*]], i32 noundef [[IDX1:%.*]], i32 noundef [[IDX2:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[IDXPROM]]
@@ -1865,26 +1809,19 @@ struct test30_struct {
 };
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
-// SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR5]] {
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR4]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[TMP0]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[TMP0]]) #[[ATTR9]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 4)
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], 12
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i8 0, i8 [[TMP2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[PCPU_REFCNT:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[PCPU_REFCNT]], i64 0, i64 [[IDXPROM]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    store i8 -1, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret void
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30(
@@ -1916,30 +1853,14 @@ struct test31_struct {
 };
 
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
-// SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[PTR]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 4
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
-// SANITIZE-WITH-ATTR-NEXT:    ret i32 [[CONV]]
+// SANITIZE-WITH-ATTR-NEXT:    ret i32 -1
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test31(
-// NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[PTR]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTINV:%.*]] = icmp slt i32 [[DOT_COUNTED_BY_LOAD]], 0
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = select i1 [[DOTINV]], i32 0, i32 [[TMP4]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[CONV]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 -1
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test31(
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR2]] {

From 4b322f68aa42d508f9f550b36013f3cf523db3f9 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Thu, 17 Oct 2024 19:56:09 -0700
Subject: [PATCH 374/427] [clang-format] Handle template opener/closer in
 braced list (#112494)

Fixes #112487.

(cherry picked from commit 67f576f31d661897c5da302b8611decb7e0f9237)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 5 +++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 631c7c62baac1..a5268e153bcc5 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2500,6 +2500,11 @@ bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
       // Assume there are no blocks inside a braced init list apart
       // from the ones we explicitly parse out (like lambdas).
       FormatTok->setBlockKind(BK_BracedInit);
+      if (!IsAngleBracket) {
+        auto *Prev = FormatTok->Previous;
+        if (Prev && Prev->is(tok::greater))
+          Prev->setFinalizedType(TT_TemplateCloser);
+      }
       nextToken();
       parseBracedList();
       break;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 6f3ef3b646c61..4acd900ff061f 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3404,6 +3404,12 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
   ASSERT_EQ(Tokens.size(), 21u) << Tokens;
   EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[16], tok::greater, TT_TemplateCloser);
+
+  Tokens =
+      annotate("auto x{std::conditional_t<T::value == U::value, T, U>{}};");
+  ASSERT_EQ(Tokens.size(), 24u) << Tokens;
+  EXPECT_TOKEN(Tokens[6], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[18], tok::greater, TT_TemplateCloser);
 }
 
 } // namespace

From def7925c48337574df23156e3a8b23a8470cdc16 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@users.noreply.github.com>
Date: Tue, 20 Aug 2024 16:51:16 -0700
Subject: [PATCH 375/427] Fix KCFI types for generated functions with integer
 normalization (#104826)

With -fsanitize-cfi-icall-experimental-normalize-integers, Clang
appends ".normalized" to KCFI types in CodeGenModule::CreateKCFITypeId,
which changes type hashes also for functions that don't have integer
types in their signatures. However, llvm::setKCFIType does not take
integer normalization into account, which means LLVM generated
functions with KCFI types, e.g. sanitizer constructors, will fail KCFI
checks when integer normalization is enabled in Clang.

Add a cfi-normalize-integers module flag to indicate integer
normalization is used, and append ".normalized" to KCFI types also in
llvm::setKCFIType to fix the type mismatch.

(cherry picked from commit e1c36bde0551977d4b2efae032af6dfc4b2b3936)
---
 clang/lib/CodeGen/CodeGenModule.cpp           |  5 +++
 clang/test/CodeGen/kcfi-normalize.c           |  1 +
 llvm/lib/Transforms/Utils/ModuleUtils.cpp     | 12 ++++---
 .../GCOVProfiling/kcfi-normalize.ll           | 35 +++++++++++++++++++
 llvm/test/Transforms/GCOVProfiling/kcfi.ll    |  8 +++--
 5 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8d9beab9fa7f8..151505baf38db 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1134,6 +1134,11 @@ void CodeGenModule::Release() {
                               CodeGenOpts.SanitizeCfiCanonicalJumpTables);
   }
 
+  if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) {
+    getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers",
+                              1);
+  }
+
   if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) {
     getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1);
     // KCFI assumes patchable-function-prefix is the same for all indirectly
diff --git a/clang/test/CodeGen/kcfi-normalize.c b/clang/test/CodeGen/kcfi-normalize.c
index 7660c908a7bdd..b9150e88f6ab5 100644
--- a/clang/test/CodeGen/kcfi-normalize.c
+++ b/clang/test/CodeGen/kcfi-normalize.c
@@ -28,6 +28,7 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
     fn(arg1, arg2, arg3);
 }
 
+// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1}
 // CHECK: ![[TYPE1]] = !{i32 -1143117868}
 // CHECK: ![[TYPE2]] = !{i32 -460921415}
 // CHECK: ![[TYPE3]] = !{i32 -333839615}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 122279160cc7e..95bf9f06bc331 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -161,11 +161,13 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
   // Matches CodeGenModule::CreateKCFITypeId in Clang.
   LLVMContext &Ctx = M.getContext();
   MDBuilder MDB(Ctx);
-  F.setMetadata(
-      LLVMContext::MD_kcfi_type,
-      MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
-                           Type::getInt32Ty(Ctx),
-                           static_cast<uint32_t>(xxHash64(MangledType))))));
+  std::string Type = MangledType.str();
+  if (M.getModuleFlag("cfi-normalize-integers"))
+    Type += ".normalized";
+  F.setMetadata(LLVMContext::MD_kcfi_type,
+                MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
+                                     Type::getInt32Ty(Ctx),
+                                     static_cast<uint32_t>(xxHash64(Type))))));
   // If the module was compiled with -fpatchable-function-entry, ensure
   // we use the same patchable-function-prefix.
   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
new file mode 100644
index 0000000000000..19122b920d1ca
--- /dev/null
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
@@ -0,0 +1,35 @@
+;; Ensure __llvm_gcov_(writeout|reset|init) have the correct !kcfi_type
+;; with integer normalization.
+; RUN: mkdir -p %t && cd %t
+; RUN: opt < %s -S -passes=insert-gcov-profiling | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @empty() !dbg !5 {
+entry:
+  ret void, !dbg !8
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "a.c", directory: "")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !DILocation(line: 2, column: 1, scope: !5)
+!9 = !{i32 4, !"kcfi", i32 1}
+!10 = !{i32 4, !"cfi-normalize-integers", i32 1}
+
+; CHECK: define internal void @__llvm_gcov_writeout()
+; CHECK-SAME: !kcfi_type ![[#TYPE:]]
+; CHECK: define internal void @__llvm_gcov_reset()
+; CHECK-SAME: !kcfi_type ![[#TYPE]]
+; CHECK: define internal void @__llvm_gcov_init()
+; CHECK-SAME: !kcfi_type ![[#TYPE]]
+
+; CHECK: ![[#TYPE]] = !{i32 -440107680}
diff --git a/llvm/test/Transforms/GCOVProfiling/kcfi.ll b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
index b25f40f05d5bc..1b97d25294cd6 100644
--- a/llvm/test/Transforms/GCOVProfiling/kcfi.ll
+++ b/llvm/test/Transforms/GCOVProfiling/kcfi.ll
@@ -24,8 +24,10 @@ entry:
 !9 = !{i32 4, !"kcfi", i32 1}
 
 ; CHECK: define internal void @__llvm_gcov_writeout()
-; CHECK-SAME: !kcfi_type
+; CHECK-SAME: !kcfi_type ![[#TYPE:]]
 ; CHECK: define internal void @__llvm_gcov_reset()
-; CHECK-SAME: !kcfi_type
+; CHECK-SAME: !kcfi_type ![[#TYPE]]
 ; CHECK: define internal void @__llvm_gcov_init()
-; CHECK-SAME: !kcfi_type
+; CHECK-SAME: !kcfi_type ![[#TYPE]]
+
+; CHECK: ![[#TYPE]] = !{i32 -1522505972}

From 936710a0a18bdf8ac1655f6e3bb518b20bb41aea Mon Sep 17 00:00:00 2001
From: Ben Shi <2283975856@qq.com>
Date: Fri, 20 Sep 2024 11:40:07 +0800
Subject: [PATCH 376/427] [AVR][MC] Fix incorrect range of relative jumps
 (#109124)

'rjmp .+4094' is legal but rejected by llvm-mc since
86a60e7f1e8f361f84ccb6e656e848dd4fbaa713, and this patch fixed that
range issue.

(cherry picked from commit 8c3b94f420a20a45dd07f3e12d6a6d649858f452)
---
 .../Target/AVR/MCTargetDesc/AVRAsmBackend.cpp |  6 +--
 llvm/test/MC/AVR/inst-rjmp.s                  | 40 ++++++++++---------
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 388d58a82214d..c0bc1276967bf 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -88,15 +88,15 @@ static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
 /// Adjusts the value of a relative branch target before fixup application.
 static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup,
                                  uint64_t &Value, MCContext *Ctx = nullptr) {
+  // Jumps are relative to the current instruction.
+  Value -= 2;
+
   // We have one extra bit of precision because the value is rightshifted by
   // one.
   signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
 
   // Rightshifts the value by one.
   AVR::fixups::adjustBranchTarget(Value);
-
-  // Jumps are relative to the current instruction.
-  Value -= 1;
 }
 
 /// 22-bit absolute fixup.
diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s
index cc843a58b55d2..2d7aa401feacf 100644
--- a/llvm/test/MC/AVR/inst-rjmp.s
+++ b/llvm/test/MC/AVR/inst-rjmp.s
@@ -19,25 +19,28 @@ end:
 x:
   rjmp x
   .short 0xc00f
+  rjmp .+4094
 
-; CHECK: rjmp (.Ltmp0+2)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel
-; CHECK: rjmp (.Ltmp1-2)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel
-; CHECK: rjmp foo           ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: foo, kind: fixup_13_pcrel
-; CHECK: rjmp (.Ltmp2+8)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel
-; CHECK: rjmp end           ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: end, kind: fixup_13_pcrel
-; CHECK: rjmp (.Ltmp3+0)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel
-; CHECK: rjmp (.Ltmp4-4)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel
-; CHECK: rjmp (.Ltmp5-6)+2  ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel
-; CHECK: rjmp x             ; encoding: [A,0b1100AAAA]
-; CHECK-NEXT:               ;   fixup A - offset: 0, value: x, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp0+2)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp1-2)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel
+; CHECK: rjmp foo             ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: foo, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp2+8)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel
+; CHECK: rjmp end             ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: end, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp3+0)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp4-4)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp5-6)+2    ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel
+; CHECK: rjmp x               ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: x, kind: fixup_13_pcrel
+; CHECK: rjmp (.Ltmp6+4094)+2 ; encoding: [A,0b1100AAAA]
+; CHECK-NEXT:                 ;   fixup A - offset: 0, value: (.Ltmp6+4094)+2, kind: fixup_13_pcrel
 
 ; INST-LABEL: <foo>:
 ; INST-NEXT: 01 c0      rjmp  .+2
@@ -54,3 +57,4 @@ x:
 ; INST-LABEL: <x>:
 ; INST-NEXT: ff cf      rjmp  .-2
 ; INST-NEXT: 0f c0      rjmp  .+30
+; INST-NEXT: ff c7      rjmp  .+4094

From e541aa5ff994a370bacb9b039f70efb0672c4a1b Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Fri, 18 Oct 2024 21:49:23 +0100
Subject: [PATCH 377/427] [clang] Make LazyOffsetPtr more portable (#112927)

LazyOffsetPtr currently relies on uint64_t being able to store a pointer
and, unless sizeof(uint64_t) == sizeof(void *), little endianness, since
getAddressOfPointer reinterprets the memory as a pointer. This also
doesn't properly respect the C++ object model.

As removing getAddressOfPointer would have wide-reaching implications,
improve the implementation to account for these problems by using
placement new and a suitably sized-and-aligned buffer, "right"-aligning
the objects on big-endian platforms so the LSBs are in the same place
for use as the discriminator.

Fixes: bc73ef0031b50f7443615fef614fb4ecaaa4bd11
Fixes: https://github.com/llvm/llvm-project/issues/111993
(cherry picked from commit 76196998e25b98d81abc437708622261810782ca)
---
 clang/include/clang/AST/ExternalASTSource.h | 48 +++++++++++++++------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h
index 385c32edbae0f..582ed7c65f58c 100644
--- a/clang/include/clang/AST/ExternalASTSource.h
+++ b/clang/include/clang/AST/ExternalASTSource.h
@@ -25,10 +25,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
+#include <new>
 #include <optional>
 #include <utility>
 
@@ -326,29 +328,49 @@ struct LazyOffsetPtr {
   ///
   /// If the low bit is clear, a pointer to the AST node. If the low
   /// bit is set, the upper 63 bits are the offset.
-  mutable uint64_t Ptr = 0;
+  static constexpr size_t DataSize = std::max(sizeof(uint64_t), sizeof(T *));
+  alignas(uint64_t) alignas(T *) mutable unsigned char Data[DataSize] = {};
+
+  unsigned char GetLSB() const {
+    return Data[llvm::sys::IsBigEndianHost ? DataSize - 1 : 0];
+  }
+
+  template <typename U> U &As(bool New) const {
+    unsigned char *Obj =
+        Data + (llvm::sys::IsBigEndianHost ? DataSize - sizeof(U) : 0);
+    if (New)
+      return *new (Obj) U;
+    return *std::launder(reinterpret_cast<U *>(Obj));
+  }
+
+  T *&GetPtr() const { return As<T *>(false); }
+  uint64_t &GetU64() const { return As<uint64_t>(false); }
+  void SetPtr(T *Ptr) const { As<T *>(true) = Ptr; }
+  void SetU64(uint64_t U64) const { As<uint64_t>(true) = U64; }
 
 public:
   LazyOffsetPtr() = default;
-  explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast<uint64_t>(Ptr)) {}
+  explicit LazyOffsetPtr(T *Ptr) : Data() { SetPtr(Ptr); }
 
-  explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) {
+  explicit LazyOffsetPtr(uint64_t Offset) : Data() {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
+    else
+      SetU64((Offset << 1) | 0x01);
   }
 
   LazyOffsetPtr &operator=(T *Ptr) {
-    this->Ptr = reinterpret_cast<uint64_t>(Ptr);
+    SetPtr(Ptr);
     return *this;
   }
 
   LazyOffsetPtr &operator=(uint64_t Offset) {
     assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits");
     if (Offset == 0)
-      Ptr = 0;
+      SetPtr(nullptr);
     else
-      Ptr = (Offset << 1) | 0x01;
+      SetU64((Offset << 1) | 0x01);
 
     return *this;
   }
@@ -356,15 +378,15 @@ struct LazyOffsetPtr {
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  explicit operator bool() const { return Ptr != 0; }
+  explicit operator bool() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is non-NULL.
   ///
   /// This operation does not require the AST node to be deserialized.
-  bool isValid() const { return Ptr != 0; }
+  bool isValid() const { return isOffset() || GetPtr() != nullptr; }
 
   /// Whether this pointer is currently stored as an offset.
-  bool isOffset() const { return Ptr & 0x01; }
+  bool isOffset() const { return GetLSB() & 0x01; }
 
   /// Retrieve the pointer to the AST node that this lazy pointer points to.
   ///
@@ -375,9 +397,9 @@ struct LazyOffsetPtr {
     if (isOffset()) {
       assert(Source &&
              "Cannot deserialize a lazy pointer without an AST source");
-      Ptr = reinterpret_cast<uint64_t>((Source->*Get)(OffsT(Ptr >> 1)));
+      SetPtr((Source->*Get)(OffsT(GetU64() >> 1)));
     }
-    return reinterpret_cast<T*>(Ptr);
+    return GetPtr();
   }
 
   /// Retrieve the address of the AST node pointer. Deserializes the pointee if
@@ -385,7 +407,7 @@ struct LazyOffsetPtr {
   T **getAddressOfPointer(ExternalASTSource *Source) const {
     // Ensure the integer is in pointer form.
     (void)get(Source);
-    return reinterpret_cast<T**>(&Ptr);
+    return &GetPtr();
   }
 };
 

From ab51eccf88f5321e7c60591c5546b254b6afab99 Mon Sep 17 00:00:00 2001
From: Florian Albrechtskirchinger <falbrechtskirchinger@gmail.com>
Date: Mon, 21 Oct 2024 12:24:58 +0200
Subject: [PATCH 378/427] [Serialization] Handle uninitialized type constraints

The ASTWriter currently assumes template type constraints to be
initialized ((bool)getTypeConstraint() == hasTypeConstraint()). Issues
#99036 and #109354 identified a scenario where this assertion is
violated.

This patch removes the assumption and adds another boolean to the
serialization, to explicitly encode whether the type constraint has been
initialized.

The same issue was incidentally fixed on the main branch by #111179.
This solution avoids backporting #111179 and its dependencies.
---
 clang/lib/Serialization/ASTReaderDecl.cpp  |  3 ++-
 clang/lib/Serialization/ASTWriterDecl.cpp  |  5 +++--
 clang/test/PCH/cxx2a-constraints-crash.cpp | 15 +++++++++++----
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index c118f3818467d..154acdfbe0327 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2665,7 +2665,8 @@ void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
 
   D->setDeclaredWithTypename(Record.readInt());
 
-  if (D->hasTypeConstraint()) {
+  const bool TypeConstraintInitialized = Record.readBool();
+  if (TypeConstraintInitialized && D->hasTypeConstraint()) {
     ConceptReference *CR = nullptr;
     if (Record.readBool())
       CR = Record.readConceptReference();
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 8a4ca54349e38..ff1334340874b 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1880,7 +1880,7 @@ void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
   Record.push_back(D->wasDeclaredWithTypename());
 
   const TypeConstraint *TC = D->getTypeConstraint();
-  assert((bool)TC == D->hasTypeConstraint());
+  Record.push_back(/*TypeConstraintInitialized=*/TC != nullptr);
   if (TC) {
     auto *CR = TC->getConceptReference();
     Record.push_back(CR != nullptr);
@@ -1898,7 +1898,7 @@ void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
   if (OwnsDefaultArg)
     Record.AddTemplateArgumentLoc(D->getDefaultArgument());
 
-  if (!TC && !OwnsDefaultArg &&
+  if (!D->hasTypeConstraint() && !OwnsDefaultArg &&
       D->getDeclContext() == D->getLexicalDeclContext() &&
       !D->isInvalidDecl() && !D->hasAttrs() &&
       !D->isTopLevelDeclInObjCContainer() && !D->isImplicit() &&
@@ -2561,6 +2561,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   // TemplateTypeParmDecl
   Abv->Add(
       BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // wasDeclaredWithTypename
+  Abv->Add(BitCodeAbbrevOp(0));                    // TypeConstraintInitialized
   Abv->Add(BitCodeAbbrevOp(0));                    // OwnsDefaultArg
   DeclTemplateTypeParmAbbrev = Stream.EmitAbbrev(std::move(Abv));
 
diff --git a/clang/test/PCH/cxx2a-constraints-crash.cpp b/clang/test/PCH/cxx2a-constraints-crash.cpp
index 637c55f0c879c..6126a0509fa4a 100644
--- a/clang/test/PCH/cxx2a-constraints-crash.cpp
+++ b/clang/test/PCH/cxx2a-constraints-crash.cpp
@@ -1,7 +1,5 @@
-// RUN: %clang_cc1 -std=c++2a -emit-pch %s -o %t
-// RUN: %clang_cc1 -std=c++2a -include-pch %t -verify %s
-
-// expected-no-diagnostics
+// RUN: %clang_cc1 -std=c++2a -fallow-pch-with-compiler-errors -emit-pch -o %t %s -verify
+// RUN: %clang_cc1 -std=c++2a -fallow-pch-with-compiler-errors -include-pch %t %s -verify
 
 #ifndef HEADER
 #define HEADER
@@ -27,3 +25,12 @@ int main() {
 }
 
 #endif
+
+namespace GH99036 {
+
+template <typename T>
+concept C; // expected-error {{expected '='}}
+
+template <C U> void f(); // expected-error {{unknown type name 'C'}}
+
+} // namespace GH99036

From a3f0f1d004a61ef94c115e7e28863ce0b476aa99 Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Mon, 11 Nov 2024 14:12:23 +0100
Subject: [PATCH 379/427] Bump version to 19.1.4

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index d57561fcd1748..6ccb934aef436 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 3)
+  set(LLVM_VERSION_PATCH 4)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index ecb21a705c515..a929db5d0f2d1 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190103
+#  define _LIBCPP_VERSION 190104
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index fa570bad3b1be..0c2804f70a147 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 3
+llvm_version_patch = 4
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 9c7b53d82aae2..81b74db977b08 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 3)
+__versioninfo__ = (19, 1, 4)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 95aa8e2684d06..77fe60a0b1590 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 3)
+__versioninfo__ = (19, 1, 4)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From d174e2a5538984bca3d31ac41aff71528bb28747 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 7 Nov 2024 00:18:57 +0200
Subject: [PATCH 380/427] [compiler-rt] [fuzzer] Skip trying to set the thread
 name on MinGW (#115167)

Since b4130bee6bfd34d8045f02fc9f951bcb5db9d85c, we check for
_LIBCPP_HAS_THREAD_API_PTHREAD to decide between using
SetThreadDescription or pthread_setname_np for setting the thread name.

c6f3b7bcd0596d30f8dabecdfb9e44f9a07b6e4c changed how libcxx defines
their configuration macros - now they are always defined, but defined to
0 or 1, while they previously were either defined or undefined.

As these libcxx defines used to be defined to an empty string (rather
than expanding to 1) if enabled, we can't easily produce an expression
that works both with older and newer libcxx. Additionally, these defines
are libcxx internal config macros that aren't a detail that isn't
supported and isn't meant to be relied upon.

Simply skip trying to set thread name on MinGW as we can't easily know
which kind of thread native handle we have. Setting the thread name is
only a nice to have, quality of life improvement - things should work
the same even without it.

Additionally, libfuzzer isn't generally usable on MinGW targets yet
(Clang doesn't include it in the getSupportedSanitizers() method for the
MinGW target), so this shouldn't make any difference in practice anyway.

(cherry picked from commit 87f4bc0acad65b1d20160d4160c7778b187125fc)
---
 compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
index 73eea07cf869f..da3eb3cfb3406 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -239,10 +239,11 @@ size_t PageSize() {
 }
 
 void SetThreadName(std::thread &thread, const std::string &name) {
-#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) ||                                 \
-    defined(_GLIBCXX_GCC_GTHR_POSIX_H)
-  (void)pthread_setname_np(thread.native_handle(), name.c_str());
-#else
+#ifndef __MINGW32__
+  // Not setting the thread name in MinGW environments. MinGW C++ standard
+  // libraries can either use native Windows threads or pthreads, so we
+  // don't know with certainty what kind of thread handle we're getting
+  // from thread.native_handle() here.
   typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR);
   HMODULE kbase = GetModuleHandleA("KernelBase.dll");
   proc ThreadNameProc =

From 2d8ee3a2d800b218a6c11de738e702148bca4f27 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 22 Sep 2024 22:31:36 -0700
Subject: [PATCH 381/427] [RISCV] Don't delete all fixups in
 RISCVMCCodeEmitter::expandLongCondBr. (#109513)

The Fixups vector passed into this function may already have fixups in
it from earlier instructions. We should not erase those. We just want to
erase fixups added by this function.

Fixes #108612.

(cherry picked from commit c3d3cef8d58377b02032b07b5f094a402a70435a)
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 7 ++++++-
 llvm/test/MC/RISCV/rv64-relax-all.s                       | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 0863345b0c6dc..c9636b2c70250 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -283,13 +283,18 @@ void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
     Offset = 4;
   }
 
+  // Save the number fixups.
+  size_t FixupStartIndex = Fixups.size();
+
   // Emit an unconditional jump to the destination.
   MCInst TmpInst =
       MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol);
   uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
   support::endian::write(CB, Binary, llvm::endianness::little);
 
-  Fixups.clear();
+  // Drop any fixup added so we can add the correct one.
+  Fixups.resize(FixupStartIndex);
+
   if (SrcSymbol.isExpr()) {
     Fixups.push_back(MCFixup::create(Offset, SrcSymbol.getExpr(),
                                      MCFixupKind(RISCV::fixup_riscv_jal),
diff --git a/llvm/test/MC/RISCV/rv64-relax-all.s b/llvm/test/MC/RISCV/rv64-relax-all.s
index 70a3f77540c99..6705d6ecfb5b6 100644
--- a/llvm/test/MC/RISCV/rv64-relax-all.s
+++ b/llvm/test/MC/RISCV/rv64-relax-all.s
@@ -14,3 +14,9 @@ c.beqz a0, NEAR
 # INSTR:           c.j    0x0 <NEAR>
 # RELAX-INSTR:     jal    zero, 0x0 <NEAR>
 c.j NEAR
+
+bnez s0, .foo
+j    .foo
+beqz s0, .foo
+.foo:
+ret

From 430c4952286f11a19dea9ad1881792442370470d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 24 Oct 2024 23:45:14 +0300
Subject: [PATCH 382/427] [compiler-rt] [test] Fix using toolchains that rely
 on Clang default configs (#113491)

The use of CLANG_NO_DEFAULT_CONFIG in the tests was added because some
Linux distributions had a global default config file, that added flags
relating to hardening, which interfere with the sanitizer tests. By
setting CLANG_NO_DEFAULT_CONFIG, the global default config files that
are found are ignored, and the sanitizers get the expected default
compiler behaviour.

(This was https://github.com/llvm/llvm-project/issues/60394, which was
fixed in 8ab762557fb057af1a3015211ee116a975027e78.)

However, some toolchains may rely on default config files for mandatory
parts required for functioning at all - setting things like sysroots,
-rtlib, -unwindlib, -stdlib, -fuse-ld etc. In such a case we can't
forcibly disable any default config, because it will break the otherwise
working toolchain.

Add a test for whether the compiler works while passing
--no-default-config to it. If the option is accepted and the toolchain
still works while that is set, set CLANG_NO_DEFAULT_CONFIG while running
tests.

(This adds a little bit of inconsistency, as we're testing for the
command line option, while using the environment variable. However doing
compile testing with an environment variable isn't quite as easily
doable, and passing an extra command line flag to all compile commands
while testing, is a bit clumsy - therefore this inconsistency.)

(cherry picked from commit a14a83d9a102253eca7c02ff4c35a2ce3f7de6e5)
---
 compiler-rt/CMakeLists.txt                | 16 ++++++++++++++++
 compiler-rt/test/CMakeLists.txt           |  2 ++
 compiler-rt/test/lit.common.cfg.py        |  6 +++++-
 compiler-rt/test/lit.common.configured.in |  1 +
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 2207555b03a03..6cf20ab7c183c 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -39,6 +39,22 @@ include(CompilerRTUtils)
 include(CMakeDependentOption)
 include(GetDarwinLinkerVersion)
 
+include(CheckCXXCompilerFlag)
+
+# Check if we can compile with --no-default-config, or if that omits a config
+# file that is essential for the toolchain to work properly.
+#
+# Using CMAKE_REQUIRED_FLAGS to make sure the flag is used both for compilation
+# and for linking.
+#
+# Doing this test early on, to see if the flag works on the toolchain
+# out of the box. Later on, we end up adding -nostdlib and similar flags
+# to all test compiles, which easily can give false positives on this test.
+set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --no-default-config")
+check_cxx_compiler_flag("" COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG)
+set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
+
 option(COMPILER_RT_BUILD_BUILTINS "Build builtins" ON)
 mark_as_advanced(COMPILER_RT_BUILD_BUILTINS)
 option(COMPILER_RT_DISABLE_AARCH64_FMV "Disable AArch64 Function Multi Versioning support" OFF)
diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt
index 84a98f3674749..f9e23710d3e4f 100644
--- a/compiler-rt/test/CMakeLists.txt
+++ b/compiler-rt/test/CMakeLists.txt
@@ -12,6 +12,8 @@ pythonize_bool(COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER)
 
 pythonize_bool(COMPILER_RT_HAS_AARCH64_SME)
 
+pythonize_bool(COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG)
+
 configure_compiler_rt_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.common.configured.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.common.configured)
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 0690c3a18efdb..d4b1e1d71d3c5 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -980,7 +980,11 @@ def is_windows_lto_supported():
 # default configs for the test runs. In particular, anything hardening
 # related is likely to cause issues with sanitizer tests, because it may
 # preempt something we're looking to trap (e.g. _FORTIFY_SOURCE vs our ASAN).
-config.environment["CLANG_NO_DEFAULT_CONFIG"] = "1"
+#
+# Only set this if we know we can still build for the target while disabling
+# default configs.
+if config.has_no_default_config_flag:
+    config.environment["CLANG_NO_DEFAULT_CONFIG"] = "1"
 
 if config.has_compiler_rt_libatomic:
   base_lib = os.path.join(config.compiler_rt_libdir, "libclang_rt.atomic%s.so"
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index 8889b816b149f..f727662799552 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -53,6 +53,7 @@ set_default("test_standalone_build_libs", @COMPILER_RT_TEST_STANDALONE_BUILD_LIB
 set_default("has_compiler_rt_libatomic", @COMPILER_RT_BUILD_STANDALONE_LIBATOMIC_PYBOOL@)
 set_default("aarch64_sme", @COMPILER_RT_HAS_AARCH64_SME_PYBOOL@)
 set_default("darwin_linker_version", "@COMPILER_RT_DARWIN_LINKER_VERSION@")
+set_default("has_no_default_config_flag", @COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG_PYBOOL@)
 # True iff the test suite supports ignoring the test compiler's runtime library path
 # and using `config.compiler_rt_libdir` instead. This only matters when the runtime
 # library paths differ.

From 7b6ee6e053fe24da790deccc0dd8fa49c6d3543d Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Fri, 1 Nov 2024 18:47:50 -0700
Subject: [PATCH 383/427] [clang-format] Fix a regression in parsing `switch`
 in macro call (#114506)

Fixes #114408.

(cherry picked from commit 6ca816f88d5f0f2032d1610207023133eaf40a1e)
---
 clang/lib/Format/UnwrappedLineParser.cpp      | 8 ++++++--
 clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index a5268e153bcc5..bfb592ae07493 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2086,7 +2086,8 @@ void UnwrappedLineParser::parseStructuralElement(
     case tok::kw_switch:
       if (Style.Language == FormatStyle::LK_Java)
         parseSwitch(/*IsExpr=*/true);
-      nextToken();
+      else
+        nextToken();
       break;
     case tok::kw_case:
       // Proto: there are no switch/case statements.
@@ -2637,7 +2638,10 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
         nextToken();
       break;
     case tok::kw_switch:
-      parseSwitch(/*IsExpr=*/true);
+      if (Style.Language == FormatStyle::LK_Java)
+        parseSwitch(/*IsExpr=*/true);
+      else
+        nextToken();
       break;
     case tok::kw_requires: {
       auto RequiresToken = FormatTok;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 4acd900ff061f..07999116ab0cf 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3412,6 +3412,13 @@ TEST_F(TokenAnnotatorTest, TemplateInstantiation) {
   EXPECT_TOKEN(Tokens[18], tok::greater, TT_TemplateCloser);
 }
 
+TEST_F(TokenAnnotatorTest, SwitchInMacroArgument) {
+  auto Tokens = annotate("FOOBAR(switch);\n"
+                         "void f() {}");
+  ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+  EXPECT_TOKEN(Tokens[9], tok::l_brace, TT_FunctionLBrace);
+}
+
 } // namespace
 } // namespace format
 } // namespace clang

From 33c4723541d7de5399e07c1fb48c5e779c3fb564 Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Tue, 5 Nov 2024 16:25:35 +0800
Subject: [PATCH 384/427] release/19.x: [Clang] Consider outer instantiation
 scopes for constraint normalization

Backport 227afac3
---
 clang/lib/Sema/SemaConcept.cpp                   |  2 +-
 .../SemaTemplate/concepts-out-of-line-def.cpp    | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 244f6ef2f53fa..c45443d76e6ba 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -967,7 +967,7 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
   // parameters that the surrounding function hasn't been instantiated yet. Note
   // this may happen while we're comparing two templates' constraint
   // equivalence.
-  LocalInstantiationScope ScopeForParameters(S);
+  LocalInstantiationScope ScopeForParameters(S, /*CombineWithOuterScope=*/true);
   if (auto *FD = DeclInfo.getDecl()->getAsFunction())
     for (auto *PVD : FD->parameters()) {
       if (!PVD->isParameterPack()) {
diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
index 333187b0d74ad..c5dd855f0c000 100644
--- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
+++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
@@ -622,3 +622,19 @@ void A<T>::method(Ts&... ts)
   } {}
 
 }
+
+namespace GH114685 {
+
+template <typename T> struct ptr {
+  template <typename U>
+  friend ptr<U> make_item(auto &&args)
+    requires(sizeof(args) > 1);
+};
+
+template <typename U>
+ptr<U> make_item(auto &&args)
+  requires(sizeof(args) > 1) {}
+
+ptr<char> p;
+
+} // namespace GH114685

From 76ca2e0566c8589c0b4f91faea5ce6ed3536a384 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 2 Nov 2024 15:39:15 +0800
Subject: [PATCH 385/427] [LoongArch][Clang] Add tests for #110834 (#114509)

(cherry picked from commit 96d2196f6f73e5712f1df8cd26de8a12c7f24de4)
---
 clang/test/Headers/lasxintrin.c | 6 ++++++
 clang/test/Headers/lsxintrin.c  | 6 ++++++
 2 files changed, 12 insertions(+)
 create mode 100644 clang/test/Headers/lasxintrin.c
 create mode 100644 clang/test/Headers/lsxintrin.c

diff --git a/clang/test/Headers/lasxintrin.c b/clang/test/Headers/lasxintrin.c
new file mode 100644
index 0000000000000..6126c6350455c
--- /dev/null
+++ b/clang/test/Headers/lasxintrin.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx
+// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none
+// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none -fno-signed-char
+// FIXME: "not" should be removed once we fix GH#110834.
+
+#include <lasxintrin.h>
diff --git a/clang/test/Headers/lsxintrin.c b/clang/test/Headers/lsxintrin.c
new file mode 100644
index 0000000000000..930d3efe62e54
--- /dev/null
+++ b/clang/test/Headers/lsxintrin.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx
+// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none
+// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none -fno-signed-char
+// FIXME: "not" should be removed once we fix GH#110834.
+
+#include <lsxintrin.h>

From 6a668bce7b50807e861d4769ad1a253a0579d4ad Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 2 Nov 2024 15:49:40 +0800
Subject: [PATCH 386/427] [LoongArch][clang] Use `signed char` vectors instead
 of `char` vectors for LSX and LASX builtins (#114510)

`-flax-vector-conversions=none` does not allow an implicit conversion
from `signed char` vector to `char` vector, and we cannot remove
`signed`
from `v16i8` or `v32i8` because doing so will break our expectation with
`-fno-signed-char`.  So to make lsxintrin.h and lasxintrin.h work fine
with `-flax-vector-conversions=none`, we must use `signed char` instead
of `char`.

The change is just done via

    sed 's/V16c/V16Sc/g' -i BuiltinsLoongArchLSX.def
    sed 's/V32c/V32Sc/g' -i BuiltinsLoongArchLASX.def

Depends on #114509.  Part of #110834 fix.

(cherry picked from commit b88505414d47ca267f4df8823309264f78935686)
---
 .../clang/Basic/BuiltinsLoongArchLASX.def     | 142 +++++++++---------
 .../clang/Basic/BuiltinsLoongArchLSX.def      | 128 ++++++++--------
 2 files changed, 135 insertions(+), 135 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index 4cf51cc000f6f..0d7c2df5c5c50 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -12,29 +12,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx")
@@ -79,22 +79,22 @@ TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
@@ -119,12 +119,12 @@ TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx")
@@ -209,7 +209,7 @@ TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
@@ -219,12 +219,12 @@ TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
@@ -239,12 +239,12 @@ TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx")
@@ -259,12 +259,12 @@ TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx")
@@ -279,12 +279,12 @@ TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx"
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx")
@@ -320,7 +320,7 @@ TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx")
@@ -330,17 +330,17 @@ TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32Sc", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32Sc", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx")
@@ -351,16 +351,16 @@ TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32ScIi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx")
@@ -368,7 +368,7 @@ TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx")
@@ -378,47 +378,47 @@ TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx")
 
@@ -430,22 +430,22 @@ TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx")
@@ -458,12 +458,12 @@ TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
@@ -476,12 +476,12 @@ TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
@@ -502,22 +502,22 @@ TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
@@ -538,22 +538,22 @@ TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
@@ -606,7 +606,7 @@ TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx")
@@ -877,12 +877,12 @@ TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32ScV32ScUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx")
@@ -902,35 +902,35 @@ TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32ScV32ScIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32ScV32ScIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32ScV32ScV32Sc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
@@ -943,16 +943,16 @@ TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32ScV32ScV32ScIUi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32ScV32ScV32ScIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx")
@@ -963,7 +963,7 @@ TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32ScvC*Ii", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx")
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
index c90f4dc5458fa..25a178e1ca98a 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
@@ -12,29 +12,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx")
@@ -79,22 +79,22 @@ TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
@@ -119,12 +119,12 @@ TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx")
@@ -209,7 +209,7 @@ TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
@@ -219,12 +219,12 @@ TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
@@ -239,12 +239,12 @@ TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx")
@@ -259,12 +259,12 @@ TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx")
@@ -279,12 +279,12 @@ TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx")
@@ -320,7 +320,7 @@ TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx")
@@ -335,16 +335,16 @@ TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx")
-TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16ScIi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx")
@@ -352,7 +352,7 @@ TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx")
-TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx")
@@ -362,47 +362,47 @@ TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx")
 
@@ -414,22 +414,22 @@ TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx")
@@ -442,12 +442,12 @@ TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
@@ -460,12 +460,12 @@ TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
@@ -486,22 +486,22 @@ TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
@@ -522,22 +522,22 @@ TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
@@ -590,7 +590,7 @@ TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx")
@@ -867,45 +867,45 @@ TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16ScV16ScUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx")
-TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16ScV16ScIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16ScV16ScIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16ScV16ScV16Sc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
@@ -916,14 +916,14 @@ TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16ScV16ScV16ScIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx")
@@ -934,7 +934,7 @@ TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16ScvC*Ii", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx")

From 3f9b36d195927e7aa0c133cd2d1efcc489b5fb7c Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 2 Nov 2024 16:19:28 +0800
Subject: [PATCH 387/427] [LoongArch][Clang] Make the parameter and return
 value of {x,}vmsknz.b builtins `signed char` vector (#114511)

These builtins operate on int8 vectors, not int16 vectors.  So the old
definition does not make any sense.

Depends on #114510.  Part of #110834 fix.

(cherry picked from commit 92daad2eac587cb0592de019cd5f6cbb7c42bb78)
---
 clang/include/clang/Basic/BuiltinsLoongArchLASX.def | 2 +-
 clang/include/clang/Basic/BuiltinsLoongArchLSX.def  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index 0d7c2df5c5c50..477f704860873 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -357,7 +357,7 @@ TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32ScV32Sc", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V32ScV32Sc", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32ScIi", "nc", "lasx")
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
index 25a178e1ca98a..e847985f39036 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
@@ -341,7 +341,7 @@ TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16ScV16Sc", "nc", "lsx")
-TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V16ScV16Sc", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16ScIi", "nc", "lsx")

From 0c5f639d8e61957b5ea31af8a3ecf588ce9864af Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Mon, 4 Nov 2024 08:57:55 +0800
Subject: [PATCH 388/427] [LoongArch][Clang] Make the parameters and return
 value of {x,}vshuf.b builtins `signed char` vectors (#114512)

The lsxintrin.h and and lasxintrin.h headers uses `signed char` vectors
instead of `unsigned char` vectors.  GCC also uses `signed char` for
them, so align their definition with the headers and GCC.

Depends on #114511.  Part of #110834 fix.

(cherry picked from commit 4006b28d102b09f4c736ef0f2664873305fedcd3)
---
 clang/include/clang/Basic/BuiltinsLoongArchLASX.def | 2 +-
 clang/include/clang/Basic/BuiltinsLoongArchLSX.def  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index 477f704860873..e6c41dd097494 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -935,7 +935,7 @@ TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx")
 
-TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx")
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
index e847985f39036..ded6519f3ef16 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
@@ -910,7 +910,7 @@ TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx")
 
-TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx")

From 499eae983bf433fc569231d36bc3dbabd0940bb5 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Mon, 4 Nov 2024 17:52:52 +0800
Subject: [PATCH 389/427] [LoongArch][Clang] Make the parameters and return
 value of {x,}vxor.v builti ns `unsigned char` vectors (#114513)

The lsxintrin.h and and lasxintrin.h headers uses `unsigned char`
vectors instead of `signed char` vectors.  GCC also uses `unsigned char`
for them, so align their definition with the headers and GCC.

Fixes #110834.

Depends on #114512.

(cherry picked from commit 4f740f9d77cd038c8e55195fa189748e58ea6476)
---
 clang/include/clang/Basic/BuiltinsLoongArchLASX.def | 2 +-
 clang/include/clang/Basic/BuiltinsLoongArchLSX.def  | 2 +-
 clang/test/Headers/lasxintrin.c                     | 5 ++---
 clang/test/Headers/lsxintrin.c                      | 5 ++---
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
index e6c41dd097494..f644b820a6189 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def
@@ -368,7 +368,7 @@ TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx")
 
 TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx")
-TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32ScV32ScV32Sc", "nc", "lasx")
+TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx")
 TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx")
diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
index ded6519f3ef16..b3056971986d1 100644
--- a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
+++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def
@@ -352,7 +352,7 @@ TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx")
 
 TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx")
-TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16ScV16ScV16Sc", "nc", "lsx")
+TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx")
 TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx")
diff --git a/clang/test/Headers/lasxintrin.c b/clang/test/Headers/lasxintrin.c
index 6126c6350455c..08f71791bdf36 100644
--- a/clang/test/Headers/lasxintrin.c
+++ b/clang/test/Headers/lasxintrin.c
@@ -1,6 +1,5 @@
 // RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx
-// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none
-// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none -fno-signed-char
-// FIXME: "not" should be removed once we fix GH#110834.
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lasx -flax-vector-conversions=none -fno-signed-char
 
 #include <lasxintrin.h>
diff --git a/clang/test/Headers/lsxintrin.c b/clang/test/Headers/lsxintrin.c
index 930d3efe62e54..83c9879eea967 100644
--- a/clang/test/Headers/lsxintrin.c
+++ b/clang/test/Headers/lsxintrin.c
@@ -1,6 +1,5 @@
 // RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx
-// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none
-// RUN: not %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none -fno-signed-char
-// FIXME: "not" should be removed once we fix GH#110834.
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none
+// RUN: %clang_cc1 %s -fsyntax-only -triple loongarch64 -target-feature +lsx -flax-vector-conversions=none -fno-signed-char
 
 #include <lsxintrin.h>

From 19026a48df57f101169e2898a798244fbbd383fc Mon Sep 17 00:00:00 2001
From: Alan Wu <XrXr@users.noreply.github.com>
Date: Fri, 16 Aug 2024 08:16:17 -0400
Subject: [PATCH 390/427] [compiler-rt] Stop using x86 builtin on AArch64 with
 GCC (#93890)

Previously, building `multc3.c` on A64 with GCC 7 or up but 9 and lower
will attempt to reference `__builtin_copysignq`, an [x86-specific
intrinsic][1]:

```
$ gcc -c multc3.c
In file included from fp_lib.h:24,
                 from multc3.c:14:
multc3.c: In function '__multc3':
int_math.h:71:32: warning: implicit declaration of function '__builtin_copysignq'; did you mean '__builtin_copysign'? [-Wimplicit-function-declaration]
 #define crt_copysignf128(x, y) __builtin_copysignq((x), (y))
                                ^~~~~~~~~~~~~~~~~~~
```

This is because `__has_builtin` is from GCC 10, and defined to 0 at the
top of int_math.h for affected GCC versions, so the fallback definition
is used. But `__builtin_copysignq` is unavailable on A64.

Use version detection to find `__builtin_copysignf128` instead. It's
available since GCC 7 and [available][2] on both x86 and A64, given this
macro is only used when `CRT_HAS_IEEE_TF`.

---

I realize this is fixing a problem for an out-of-tree build
configuration, but help would be greatly appreciated. Rust
[builds](https://github.com/rust-lang/compiler-builtins) `multc3.c` with
GCC 8 and this mis-selection is causing [build
issues](https://github.com/rust-lang/rust/issues/125619) way downstream.

ref: d2ce3e9621411f3391def327f89e3a650918989f

[1]: https://gcc.gnu.org/onlinedocs/gcc/x86-Built-in-Functions.html
[2]: https://gcc.gnu.org/gcc-7/changes.html

(cherry picked from commit 8aa9d6206ce55bdaaf422839c351fbd63f033b89)
---
 compiler-rt/lib/builtins/int_math.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/int_math.h b/compiler-rt/lib/builtins/int_math.h
index 74d3e311db5e7..08bfe922ffa13 100644
--- a/compiler-rt/lib/builtins/int_math.h
+++ b/compiler-rt/lib/builtins/int_math.h
@@ -65,9 +65,12 @@
 #define crt_copysign(x, y) __builtin_copysign((x), (y))
 #define crt_copysignf(x, y) __builtin_copysignf((x), (y))
 #define crt_copysignl(x, y) __builtin_copysignl((x), (y))
-#if __has_builtin(__builtin_copysignf128)
+// We define __has_builtin to always return 0 for GCC versions below 10,
+// but __builtin_copysignf128 is available since version 7.
+#if __has_builtin(__builtin_copysignf128) ||                                   \
+    (defined(__GNUC__) && __GNUC__ >= 7)
 #define crt_copysignf128(x, y) __builtin_copysignf128((x), (y))
-#elif __has_builtin(__builtin_copysignq) || (defined(__GNUC__) && __GNUC__ >= 7)
+#elif __has_builtin(__builtin_copysignq)
 #define crt_copysignf128(x, y) __builtin_copysignq((x), (y))
 #endif
 #endif
@@ -80,9 +83,11 @@
 #define crt_fabs(x) __builtin_fabs((x))
 #define crt_fabsf(x) __builtin_fabsf((x))
 #define crt_fabsl(x) __builtin_fabsl((x))
-#if __has_builtin(__builtin_fabsf128)
+// We define __has_builtin to always return 0 for GCC versions below 10,
+// but __builtin_fabsf128 is available since version 7.
+#if __has_builtin(__builtin_fabsf128) || (defined(__GNUC__) && __GNUC__ >= 7)
 #define crt_fabsf128(x) __builtin_fabsf128((x))
-#elif __has_builtin(__builtin_fabsq) || (defined(__GNUC__) && __GNUC__ >= 7)
+#elif __has_builtin(__builtin_fabsq)
 #define crt_fabsf128(x) __builtin_fabsq((x))
 #endif
 #endif

From ec947f9d3315bd6654709b143ed4b5b98899524e Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <87052487+anutosh491@users.noreply.github.com>
Date: Fri, 25 Oct 2024 11:39:14 +0530
Subject: [PATCH 391/427] [clang-repl] Fix undefined lld::wasm::link symbol
 while building clangInterpreter for wasm (#113446)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While building llvm (clang, lld) for wasm using emscripten (recipe
hosted on emscripten-forge
https://github.com/emscripten-forge/recipes/tree/main/recipes/recipes_emscripten/llvm)
I ended up with this error
```
 │ │ wasm-ld: error: ../../../../lib/libclangInterpreter.a(Wasm.cpp.o): undefined symbol: lld::wasm::link(llvm::ArrayRef<char const*>, llvm::raw_ostream&, llvm:
 │ │ :raw_ostream&, bool, bool)
 ```
 This is due to the link function here
 https://github.com/llvm/llvm-project/blob/a4819bd46d8baebc3aaa8b38f78065de33593199/clang/lib/Interpreter/Wasm.cpp#L25-L30

 This was added through this PR (https://github.com/llvm/llvm-project/pull/86402) as an attempt to support running clang-repl and executing C++ code interactively inside a Javascript engine using WebAssembly when built with Emscripten.

 The definition for link is present in lldwasm and when building for the emscripten platform we should be linking against it.

(cherry picked from commit 075581f34035c01659cc883d0d69336c279ef0d5)
---
 clang/lib/Interpreter/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt
index 6a069659ebb8d..0a2d60757c216 100644
--- a/clang/lib/Interpreter/CMakeLists.txt
+++ b/clang/lib/Interpreter/CMakeLists.txt
@@ -14,6 +14,7 @@ set(LLVM_LINK_COMPONENTS
 
 if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS)
   set(WASM_SRC Wasm.cpp)
+  set(WASM_LINK lldWasm)
 endif()
 
 add_clang_library(clangInterpreter
@@ -43,6 +44,7 @@ add_clang_library(clangInterpreter
   clangParse
   clangSema
   clangSerialization
+  ${WASM_LINK}
   )
 
 if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS)

From ffc48250bca5128795084f4af721d310f49f3e27 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me@antoniofrighetto.com>
Date: Tue, 12 Nov 2024 10:45:46 +0100
Subject: [PATCH 392/427] [InstCombine] Intersect nowrap flags between geps
 while folding into phi

A miscompilation issue has been addressed with refined checking.
---
 .../Transforms/InstCombine/InstCombinePHI.cpp |  3 +-
 .../test/Transforms/InstCombine/opaque-ptr.ll |  2 +-
 llvm/test/Transforms/InstCombine/phi.ll       | 28 +++++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 86411320ab248..b05a33c688890 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -513,7 +513,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
   // especially bad when the PHIs are in the header of a loop.
   bool NeededPhi = false;
 
-  GEPNoWrapFlags NW = GEPNoWrapFlags::all();
+  // Remember flags of the first phi-operand getelementptr.
+  GEPNoWrapFlags NW = FirstInst->getNoWrapFlags();
 
   // Scan to see if all operands are the same opcode, and all have one user.
   for (Value *V : drop_begin(PN.incoming_values())) {
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index df85547f56d74..1fd8281b53816 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -549,7 +549,7 @@ define ptr @phi_of_gep_flags_1(i1 %c, ptr %p) {
 ; CHECK:       else:
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = getelementptr nusw nuw i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[PHI:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    ret ptr [[PHI]]
 ;
   br i1 %c, label %if, label %else
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index b12982dd27e40..82ea9bb439b0b 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -2714,3 +2714,31 @@ join:
   %cmp = icmp slt i32 %13, 0
   ret i1 %cmp
 }
+
+define i64 @wrong_gep_arg_into_phi(ptr noundef %ptr) {
+; CHECK-LABEL: @wrong_gep_arg_into_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[PTR_PN:%.*]] = phi ptr [ [[PTR:%.*]], [[ENTRY:%.*]] ], [ [[DOTPN:%.*]], [[FOR_COND]] ]
+; CHECK-NEXT:    [[DOTPN]] = getelementptr i8, ptr [[PTR_PN]], i64 1
+; CHECK-NEXT:    [[VAL:%.*]] = load i8, ptr [[DOTPN]], align 1
+; CHECK-NEXT:    [[COND_NOT:%.*]] = icmp eq i8 [[VAL]], 0
+; CHECK-NEXT:    br i1 [[COND_NOT]], label [[EXIT:%.*]], label [[FOR_COND]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %add.ptr = getelementptr i8, ptr %ptr, i64 1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %.pn = phi ptr [ %add.ptr, %entry ], [ %incdec.ptr, %for.cond ]
+  %val = load i8, ptr %.pn, align 1
+  %cond = icmp ne i8 %val, 0
+  %incdec.ptr = getelementptr inbounds nuw i8, ptr %.pn, i64 1
+  br i1 %cond, label %for.cond, label %exit
+
+exit:                                             ; preds = %for.cond
+  ret i64 0
+}

From 81005af65fa40638d8766b1b10bdb0dd705bf782 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao@microsoft.com>
Date: Wed, 13 Nov 2024 15:35:03 -0800
Subject: [PATCH 393/427] [llvm][aarch64] Fix Arm64EC name mangling algorithm
 (#115567)

Arm64EC uses a special name mangling mode that adds `$$h` between the
symbol name and its type. In MSVC's name mangling `@` is used to
separate the name and type BUT it is also used for other purposes, such
as the separator between paths in a fully qualified name.

The original algorithm was quite fragile and made assumptions that
didn't hold true for all MSVC mangled symbols, so instead of trying to
improve this algorithm we are now using the demangler to indicate where
the insertion point should be (i.e., to parse the fully-qualified name
and return the current string offset).

Also fixed `isArm64ECMangledFunctionName` to search for `@$$h` since the
`$$h` must always be after a `@`.

Fixes #115231
---
 llvm/include/llvm/Demangle/Demangle.h         |  4 +
 .../include/llvm/Demangle/MicrosoftDemangle.h |  4 +
 llvm/include/llvm/IR/Mangler.h                |  6 ++
 llvm/lib/Demangle/MicrosoftDemangle.cpp       | 19 +++++
 llvm/lib/IR/Mangler.cpp                       | 36 ++++-----
 llvm/unittests/IR/ManglerTest.cpp             | 77 +++++++++++++++++++
 6 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index fe129603c0785..132e5088b5514 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -10,6 +10,7 @@
 #define LLVM_DEMANGLE_DEMANGLE_H
 
 #include <cstddef>
+#include <optional>
 #include <string>
 #include <string_view>
 
@@ -54,6 +55,9 @@ enum MSDemangleFlags {
 char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
                         int *status, MSDemangleFlags Flags = MSDF_None);
 
+std::optional<size_t>
+getArm64ECInsertionPointInMangledName(std::string_view MangledName);
+
 // Demangles a Rust v0 mangled symbol.
 char *rustDemangle(std::string_view MangledName);
 
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index 6891185a28e57..276efa7603690 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
 #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
 
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
 
 #include <cassert>
@@ -141,6 +142,9 @@ enum class FunctionIdentifierCodeGroup { Basic, Under, DoubleUnder };
 // It has a set of functions to parse mangled symbols into Type instances.
 // It also has a set of functions to convert Type instances to strings.
 class Demangler {
+  friend std::optional<size_t>
+  llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName);
+
 public:
   Demangler() = default;
   virtual ~Demangler() = default;
diff --git a/llvm/include/llvm/IR/Mangler.h b/llvm/include/llvm/IR/Mangler.h
index f28ffc961b6db..33af40c5ae98d 100644
--- a/llvm/include/llvm/IR/Mangler.h
+++ b/llvm/include/llvm/IR/Mangler.h
@@ -56,6 +56,12 @@ void emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
 std::optional<std::string> getArm64ECMangledFunctionName(StringRef Name);
 std::optional<std::string> getArm64ECDemangledFunctionName(StringRef Name);
 
+/// Check if an ARM64EC function name is mangled.
+bool inline isArm64ECMangledFunctionName(StringRef Name) {
+  return Name[0] == '#' ||
+         (Name[0] == '?' && Name.find("@$$h") != StringRef::npos);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index c5835e8c2e989..d35902a333767 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -24,6 +24,7 @@
 #include <array>
 #include <cctype>
 #include <cstdio>
+#include <optional>
 #include <string_view>
 #include <tuple>
 
@@ -2424,6 +2425,24 @@ void Demangler::dumpBackReferences() {
     std::printf("\n");
 }
 
+std::optional<size_t>
+llvm::getArm64ECInsertionPointInMangledName(std::string_view MangledName) {
+  std::string_view ProcessedName{MangledName};
+
+  // We only support this for MSVC-style C++ symbols.
+  if (!consumeFront(ProcessedName, '?'))
+    return std::nullopt;
+
+  // The insertion point is just after the name of the symbol, so parse that to
+  // remove it from the processed name.
+  Demangler D;
+  D.demangleFullyQualifiedSymbolName(ProcessedName);
+  if (D.Error)
+    return std::nullopt;
+
+  return MangledName.length() - ProcessedName.length();
+}
+
 char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
                               int *Status, MSDemangleFlags Flags) {
   Demangler D;
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index e6c3ea9d56883..884739b3212c6 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -291,30 +292,25 @@ void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
 }
 
 std::optional<std::string> llvm::getArm64ECMangledFunctionName(StringRef Name) {
-  bool IsCppFn = Name[0] == '?';
-  if (IsCppFn && Name.contains("$$h"))
-    return std::nullopt;
-  if (!IsCppFn && Name[0] == '#')
+  if (Name[0] != '?') {
+    // For non-C++ symbols, prefix the name with "#" unless it's already
+    // mangled.
+    if (Name[0] == '#')
+      return std::nullopt;
+    return std::optional<std::string>(("#" + Name).str());
+  }
+
+  // If the name contains $$h, then it is already mangled.
+  if (Name.contains("$$h"))
     return std::nullopt;
 
-  StringRef Prefix = "$$h";
-  size_t InsertIdx = 0;
-  if (IsCppFn) {
-    InsertIdx = Name.find("@@");
-    size_t ThreeAtSignsIdx = Name.find("@@@");
-    if (InsertIdx != std::string::npos && InsertIdx != ThreeAtSignsIdx) {
-      InsertIdx += 2;
-    } else {
-      InsertIdx = Name.find("@");
-      if (InsertIdx != std::string::npos)
-        InsertIdx++;
-    }
-  } else {
-    Prefix = "#";
-  }
+  // Ask the demangler where we should insert "$$h".
+  auto InsertIdx = getArm64ECInsertionPointInMangledName(Name);
+  if (!InsertIdx)
+    return std::nullopt;
 
   return std::optional<std::string>(
-      (Name.substr(0, InsertIdx) + Prefix + Name.substr(InsertIdx)).str());
+      (Name.substr(0, *InsertIdx) + "$$h" + Name.substr(*InsertIdx)).str());
 }
 
 std::optional<std::string>
diff --git a/llvm/unittests/IR/ManglerTest.cpp b/llvm/unittests/IR/ManglerTest.cpp
index f2b78a1f98769..f8a3152564fd9 100644
--- a/llvm/unittests/IR/ManglerTest.cpp
+++ b/llvm/unittests/IR/ManglerTest.cpp
@@ -174,4 +174,81 @@ TEST(ManglerTest, GOFF) {
             "L#foo");
 }
 
+TEST(ManglerTest, Arm64EC) {
+  constexpr std::string_view Arm64ECNames[] = {
+      // Basic C name.
+      "#Foo",
+
+      // Basic C++ name.
+      "?foo@@$$hYAHXZ",
+
+      // Regression test: https://github.com/llvm/llvm-project/issues/115231
+      "?GetValue@?$Wrapper@UA@@@@$$hQEBAHXZ",
+
+      // Symbols from:
+      // ```
+      // namespace A::B::C::D {
+      // struct Base {
+      //   virtual int f() { return 0; }
+      // };
+      // }
+      // struct Derived : public A::B::C::D::Base {
+      //   virtual int f() override { return 1; }
+      // };
+      // A::B::C::D::Base* MakeObj() { return new Derived(); }
+      // ```
+      // void * __cdecl operator new(unsigned __int64)
+      "??2@$$hYAPEAX_K@Z",
+      // public: virtual int __cdecl A::B::C::D::Base::f(void)
+      "?f@Base@D@C@B@A@@$$hUEAAHXZ",
+      // public: __cdecl A::B::C::D::Base::Base(void)
+      "??0Base@D@C@B@A@@$$hQEAA@XZ",
+      // public: virtual int __cdecl Derived::f(void)
+      "?f@Derived@@$$hUEAAHXZ",
+      // public: __cdecl Derived::Derived(void)
+      "??0Derived@@$$hQEAA@XZ",
+      // struct A::B::C::D::Base * __cdecl MakeObj(void)
+      "?MakeObj@@$$hYAPEAUBase@D@C@B@A@@XZ",
+
+      // Symbols from:
+      // ```
+      // template <typename T> struct WW { struct Z{}; };
+      // template <typename X> struct Wrapper {
+      //   int GetValue(typename WW<X>::Z) const;
+      // };
+      // struct A { };
+      // template <typename X> int Wrapper<X>::GetValue(typename WW<X>::Z) const
+      // { return 3; }
+      // template class Wrapper<A>;
+      // ```
+      // public: int __cdecl Wrapper<struct A>::GetValue(struct WW<struct
+      // A>::Z)const
+      "?GetValue@?$Wrapper@UA@@@@$$hQEBAHUZ@?$WW@UA@@@@@Z",
+  };
+
+  for (const auto &Arm64ECName : Arm64ECNames) {
+    // Check that this is a mangled name.
+    EXPECT_TRUE(isArm64ECMangledFunctionName(Arm64ECName))
+        << "Test case: " << Arm64ECName;
+    // Refuse to mangle it again.
+    EXPECT_FALSE(getArm64ECMangledFunctionName(Arm64ECName).has_value())
+        << "Test case: " << Arm64ECName;
+
+    // Demangle.
+    auto Arm64Name = getArm64ECDemangledFunctionName(Arm64ECName);
+    EXPECT_TRUE(Arm64Name.has_value()) << "Test case: " << Arm64ECName;
+    // Check that it is not mangled.
+    EXPECT_FALSE(isArm64ECMangledFunctionName(Arm64Name.value()))
+        << "Test case: " << Arm64ECName;
+    // Refuse to demangle it again.
+    EXPECT_FALSE(getArm64ECDemangledFunctionName(Arm64Name.value()).has_value())
+        << "Test case: " << Arm64ECName;
+
+    // Round-trip.
+    auto RoundTripArm64ECName =
+        getArm64ECMangledFunctionName(Arm64Name.value());
+    EXPECT_EQ(RoundTripArm64ECName, Arm64ECName);
+  }
+}
+
 } // end anonymous namespace

From c9e8540d6c87ecd5a5057a9b0fe47d63affe3062 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com>
Date: Fri, 18 Oct 2024 10:18:34 +0200
Subject: [PATCH 394/427] [clang] Fix C23 constexpr crashes (#112708)

Before using a constexpr variable that is not properly initialized check
that it is valid.

Fixes https://github.com/llvm/llvm-project/issues/109095
Fixes https://github.com/llvm/llvm-project/issues/112516
---
 clang/lib/AST/Decl.cpp      | 10 +++++++---
 clang/test/Sema/constexpr.c | 17 +++++++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 490c4a2fc525c..bc7cce0bcd7fc 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -2503,7 +2503,8 @@ bool VarDecl::isUsableInConstantExpressions(const ASTContext &Context) const {
   if (!DefVD->mightBeUsableInConstantExpressions(Context))
     return false;
   //   ... and its initializer is a constant initializer.
-  if (Context.getLangOpts().CPlusPlus && !DefVD->hasConstantInitialization())
+  if ((Context.getLangOpts().CPlusPlus || getLangOpts().C23) &&
+      !DefVD->hasConstantInitialization())
     return false;
   // C++98 [expr.const]p1:
   //   An integral constant-expression can involve only [...] const variables
@@ -2610,8 +2611,11 @@ bool VarDecl::hasICEInitializer(const ASTContext &Context) const {
 }
 
 bool VarDecl::hasConstantInitialization() const {
-  // In C, all globals (and only globals) have constant initialization.
-  if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus)
+  // In C, all globals and constexpr variables should have constant
+  // initialization. For constexpr variables in C check that initializer is a
+  // constant initializer because they can be used in constant expressions.
+  if (hasGlobalStorage() && !getASTContext().getLangOpts().CPlusPlus &&
+      !isConstexpr())
     return true;
 
   // In C++, it depends on whether the evaluation at the point of definition
diff --git a/clang/test/Sema/constexpr.c b/clang/test/Sema/constexpr.c
index a874fd6480840..ad2c2ba98a1e3 100644
--- a/clang/test/Sema/constexpr.c
+++ b/clang/test/Sema/constexpr.c
@@ -364,3 +364,20 @@ void constexprif() {
 void constevalif() {
   if consteval (300) {} //expected-error {{expected '(' after 'if'}}
 }
+
+struct S11 {
+  int len;
+};
+void ghissue112516() {
+  struct S11 *s11 = 0;
+  constexpr int num = s11->len; // expected-error {{constexpr variable 'num' must be initialized by a constant expression}}
+  void *Arr[num];
+}
+
+void ghissue109095() {
+  constexpr char c[] = { 'a' };
+  constexpr int i = c[1]; // expected-error {{constexpr variable 'i' must be initialized by a constant expression}}\
+                          // expected-note {{declared here}}
+  _Static_assert(i == c[0]); // expected-error {{static assertion expression is not an integral constant expression}}\
+                             // expected-note {{initializer of 'i' is not a constant expression}}
+}

From 863b2e599016ada8b2f4cc4d8c59eb7d8f61ee7f Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Wed, 30 Oct 2024 17:27:04 +0800
Subject: [PATCH 395/427] [C++20] [Modules] Fix the duplicated static
 initializer problem (#114193)

Reproducer:

```
//--- a.cppm
export module a;
int func();
static int a = func();

//--- a.cpp
import a;
```

The `func()` should only execute once. However, before this patch we
will somehow import `static int a` from a.cppm incorrectly and
initialize that again.

This is super bad and can introduce serious runtime behaviors.

And also surprisingly, it looks like the root cause of the problem is
simply some oversight choosing APIs.

(cherry picked from commit 259eaa6878ead1e2e7ef572a874dc3d885c1899b)
---
 clang/lib/CodeGen/CodeGenModule.cpp        |  4 ++--
 clang/test/Modules/static-initializer.cppm | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Modules/static-initializer.cppm

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 151505baf38db..2a5d5f9083ae6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -7080,8 +7080,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     // For C++ standard modules we are done - we will call the module
     // initializer for imported modules, and that will likewise call those for
     // any imports it has.
-    if (CXX20ModuleInits && Import->getImportedOwningModule() &&
-        !Import->getImportedOwningModule()->isModuleMapModule())
+    if (CXX20ModuleInits && Import->getImportedModule() &&
+        Import->getImportedModule()->isNamedModule())
       break;
 
     // For clang C++ module map modules the initializers for sub-modules are
diff --git a/clang/test/Modules/static-initializer.cppm b/clang/test/Modules/static-initializer.cppm
new file mode 100644
index 0000000000000..10d4854ee67fa
--- /dev/null
+++ b/clang/test/Modules/static-initializer.cppm
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cpp
+
+//--- a.cppm
+export module a;
+int func();
+static int a = func();
+
+//--- a.cpp
+import a;
+
+// CHECK-NOT: internal global
+// CHECK-NOT: __cxx_global_var_init
+

From ec2e1cae2c6330cd1e3b7f580702f1949827899a Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 9 Oct 2024 08:46:59 -0400
Subject: [PATCH 396/427] [libc++] Fix broken configuration system-libcxxabi on
 Apple (#110920)

On Apple platforms, using system-libcxxabi as an ABI library wouldn't
work because we'd try to re-export symbols from libc++abi that the
system libc++abi.dylib might not have. Instead, only re-export those
symbols when we're using the in-tree libc++abi.

This does mean that libc++.dylib won't re-export any libc++abi symbols
when building against the system libc++abi, which could be fixed in
various ways. However, the best solution really depends on the intended
use case, so this patch doesn't try to solve that problem.

As a drive-by, also improve the diagnostic message when the user forgets
to set the LIBCXX_CXX_ABI_INCLUDE_PATHS variable, which would previously
lead to a confusing error.

Closes #104672

(cherry picked from commit 21da4e7f51c7adfd0b1c5defc8bd0d16ea1ce759)
---
 libcxx/cmake/Modules/HandleLibCXXABI.cmake | 25 +++++++++++++++++++++-
 libcxx/src/CMakeLists.txt                  |  8 ++-----
 libcxxabi/src/CMakeLists.txt               |  2 ++
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/libcxx/cmake/Modules/HandleLibCXXABI.cmake b/libcxx/cmake/Modules/HandleLibCXXABI.cmake
index 34e9a672a960f..52236f473f35d 100644
--- a/libcxx/cmake/Modules/HandleLibCXXABI.cmake
+++ b/libcxx/cmake/Modules/HandleLibCXXABI.cmake
@@ -83,6 +83,10 @@ endfunction()
 
 # Link against a system-provided libstdc++
 if ("${LIBCXX_CXX_ABI}" STREQUAL "libstdc++")
+  if(NOT LIBCXX_CXX_ABI_INCLUDE_PATHS)
+    message(FATAL_ERROR "LIBCXX_CXX_ABI_INCLUDE_PATHS must be set when selecting libstdc++ as an ABI library")
+  endif()
+
   add_library(libcxx-abi-headers INTERFACE)
   import_private_headers(libcxx-abi-headers "${LIBCXX_CXX_ABI_INCLUDE_PATHS}"
     "cxxabi.h;bits/c++config.h;bits/os_defines.h;bits/cpu_defines.h;bits/cxxabi_tweaks.h;bits/cxxabi_forced.h")
@@ -96,6 +100,10 @@ if ("${LIBCXX_CXX_ABI}" STREQUAL "libstdc++")
 
 # Link against a system-provided libsupc++
 elseif ("${LIBCXX_CXX_ABI}" STREQUAL "libsupc++")
+  if(NOT LIBCXX_CXX_ABI_INCLUDE_PATHS)
+    message(FATAL_ERROR "LIBCXX_CXX_ABI_INCLUDE_PATHS must be set when selecting libsupc++ as an ABI library")
+  endif()
+
   add_library(libcxx-abi-headers INTERFACE)
   import_private_headers(libcxx-abi-headers "${LIBCXX_CXX_ABI_INCLUDE_PATHS}"
     "cxxabi.h;bits/c++config.h;bits/os_defines.h;bits/cpu_defines.h;bits/cxxabi_tweaks.h;bits/cxxabi_forced.h")
@@ -114,7 +122,18 @@ elseif ("${LIBCXX_CXX_ABI}" STREQUAL "libcxxabi")
   target_compile_definitions(libcxx-abi-headers INTERFACE "-DLIBCXX_BUILDING_LIBCXXABI")
 
   if (TARGET cxxabi_shared)
-    add_library(libcxx-abi-shared ALIAS cxxabi_shared)
+    add_library(libcxx-abi-shared INTERFACE)
+    target_link_libraries(libcxx-abi-shared INTERFACE cxxabi_shared)
+
+    # When using the in-tree libc++abi as an ABI library, libc++ re-exports the
+    # libc++abi symbols (on platforms where it can) because libc++abi is only an
+    # implementation detail of libc++.
+    target_link_libraries(libcxx-abi-shared INTERFACE cxxabi-reexports)
+
+    # Populate the OUTPUT_NAME property of libcxx-abi-shared because that is used when
+    # generating a linker script.
+    get_target_property(_output_name cxxabi_shared OUTPUT_NAME)
+    set_target_properties(libcxx-abi-shared PROPERTIES "OUTPUT_NAME" "${_output_name}")
   endif()
 
   if (TARGET cxxabi_static)
@@ -131,6 +150,10 @@ elseif ("${LIBCXX_CXX_ABI}" STREQUAL "libcxxabi")
 
 # Link against a system-provided libc++abi
 elseif ("${LIBCXX_CXX_ABI}" STREQUAL "system-libcxxabi")
+  if(NOT LIBCXX_CXX_ABI_INCLUDE_PATHS)
+    message(FATAL_ERROR "LIBCXX_CXX_ABI_INCLUDE_PATHS must be set when selecting system-libcxxabi as an ABI library")
+  endif()
+
   add_library(libcxx-abi-headers INTERFACE)
   import_private_headers(libcxx-abi-headers "${LIBCXX_CXX_ABI_INCLUDE_PATHS}" "cxxabi.h;__cxxabi_config.h")
   target_compile_definitions(libcxx-abi-headers INTERFACE "-DLIBCXX_BUILDING_LIBCXXABI")
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 0dfc9647558d4..b9ecbb196694a 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -229,14 +229,10 @@ if (LIBCXX_ENABLE_SHARED)
     target_link_libraries(cxx_shared PUBLIC libcxx-abi-shared)
   endif()
 
-  # Maybe re-export symbols from libc++abi
-  # In particular, we don't re-export the symbols if libc++abi is merged statically
-  # into libc++ because in that case there's no dylib to re-export from.
+  # Maybe force some symbols to be weak, not weak or not exported.
+  # TODO: This shouldn't depend on the platform, and ideally it should be done in the sources.
   if (APPLE AND LIBCXX_CXX_ABI MATCHES "libcxxabi$"
             AND NOT LIBCXX_STATICALLY_LINK_ABI_IN_SHARED_LIBRARY)
-    target_link_libraries(cxx_shared PRIVATE cxxabi-reexports)
-
-    # TODO: These exports controls should not be tied to whether we re-export libc++abi symbols
     target_link_libraries(cxx_shared PRIVATE
       "-Wl,-unexported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/libc++unexp.exp"
       "-Wl,-force_symbols_not_weak_list,${CMAKE_CURRENT_SOURCE_DIR}/../lib/notweak.exp"
diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index f95a8c471fd39..1e06b8cbf084d 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -213,6 +213,8 @@ if (LIBCXXABI_ENABLE_SHARED)
     list(APPEND LIBCXXABI_INSTALL_TARGETS "cxxabi_shared")
   endif()
 
+  # TODO: Move this to libc++'s HandleLibCXXABI.cmake since this is effectively trying to control
+  #       what libc++ re-exports.
   add_library(cxxabi-reexports INTERFACE)
   function(export_symbols file)
     # -exported_symbols_list is only available on Apple platforms

From c9b952e0e0b8f90c8d2b99f07263c43bced043f5 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Fri, 8 Nov 2024 16:20:04 +0800
Subject: [PATCH 397/427] [InstCombine] Drop nsw in negation of select
 (#112893)

Closes https://github.com/llvm/llvm-project/issues/112666 and
https://github.com/llvm/llvm-project/issues/114181.

(cherry picked from commit ff07df6620c32571c7e13ff96ec7976c63ed0ab8)
---
 .../InstCombine/InstCombineNegator.cpp        | 11 +++++
 .../InstCombine/sub-of-negatible.ll           | 42 +++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index e4895b59f4b4a..cb052da79bb3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -334,6 +334,17 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
       NewSelect->swapValues();
       // Don't swap prof metadata, we didn't change the branch behavior.
       NewSelect->setName(I->getName() + ".neg");
+      // Poison-generating flags should be dropped
+      Value *TV = NewSelect->getTrueValue();
+      Value *FV = NewSelect->getFalseValue();
+      if (match(TV, m_Neg(m_Specific(FV))))
+        cast<Instruction>(TV)->dropPoisonGeneratingFlags();
+      else if (match(FV, m_Neg(m_Specific(TV))))
+        cast<Instruction>(FV)->dropPoisonGeneratingFlags();
+      else {
+        cast<Instruction>(TV)->dropPoisonGeneratingFlags();
+        cast<Instruction>(FV)->dropPoisonGeneratingFlags();
+      }
       Builder.Insert(NewSelect);
       return NewSelect;
     }
diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
index b2e14ceaca1b0..f9549881aa313 100644
--- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
+++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
@@ -1374,6 +1374,48 @@ define i8 @negate_select_of_op_vs_negated_op(i8 %x, i8 %y, i1 %c) {
   %t2 = sub i8 %y, %t1
   ret i8 %t2
 }
+
+define i8 @negate_select_of_op_vs_negated_op_nsw(i8 %x, i8 %y, i1 %c) {
+; CHECK-LABEL: @negate_select_of_op_vs_negated_op_nsw(
+; CHECK-NEXT:    [[T0:%.*]] = sub i8 0, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %t0 = sub nsw i8 0, %x
+  %t1 = select i1 %c, i8 %t0, i8 %x
+  %t2 = sub i8 %y, %t1
+  ret i8 %t2
+}
+
+define i8 @negate_select_of_op_vs_negated_op_nsw_commuted(i8 %x, i8 %y, i1 %c) {
+; CHECK-LABEL: @negate_select_of_op_vs_negated_op_nsw_commuted(
+; CHECK-NEXT:    [[T0:%.*]] = sub i8 0, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i8 [[T0]], i8 [[X]]
+; CHECK-NEXT:    [[T2:%.*]] = add i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %t0 = sub nsw i8 0, %x
+  %t1 = select i1 %c, i8 %x, i8 %t0
+  %t2 = sub i8 %y, %t1
+  ret i8 %t2
+}
+
+define i8 @negate_select_of_op_vs_negated_op_nsw_xyyx(i8 %x, i8 %y, i8 %z, i1 %c) {
+; CHECK-LABEL: @negate_select_of_op_vs_negated_op_nsw_xyyx(
+; CHECK-NEXT:    [[SUB1:%.*]] = sub i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[SUB2:%.*]] = sub i8 [[Y]], [[X]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C:%.*]], i8 [[SUB2]], i8 [[SUB1]]
+; CHECK-NEXT:    [[T2:%.*]] = add i8 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i8 [[T2]]
+;
+  %sub1 = sub nsw i8 %x, %y
+  %sub2 = sub nsw i8 %y, %x
+  %t1 = select i1 %c, i8 %sub1, i8 %sub2
+  %t2 = sub i8 %z, %t1
+  ret i8 %t2
+}
+
 define i8 @dont_negate_ordinary_select(i8 %x, i8 %y, i8 %z, i1 %c) {
 ; CHECK-LABEL: @dont_negate_ordinary_select(
 ; CHECK-NEXT:    [[T0:%.*]] = select i1 [[C:%.*]], i8 [[X:%.*]], i8 [[Y:%.*]]

From 54bc386e169635184619874d365c5ecfad61721a Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1@linux.ibm.com>
Date: Fri, 26 Jul 2024 14:33:40 +0200
Subject: [PATCH 398/427] [SystemZ] Use the EVT version of getVectorVT() in
 combineTruncateExtract(). (#100150)

A test case showed up where the new vector type is v24i16, which is not a simple
MVT. In order to get an extended value type for cases like this, EVT::getVectorVT()
needs to be called instead of MVT::getVectorVT(), otherwise the following call
to getVectorElementType() in combineExtract() will fail.

(cherry picked from commit 22bc9db92b46965882b1c77aebc86430149b0912)
---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/SystemZ/vec-combine-01.ll     | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index b2b88143354a5..383393914a169 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -6653,7 +6653,8 @@ SDValue SystemZTargetLowering::combineTruncateExtract(
 
           // Defer the creation of the bitcast from X to combineExtract,
           // which might be able to optimize the extraction.
-          VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
+          VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
+                                   MVT::getIntegerVT(TruncBytes * 8),
                                    VecVT.getStoreSize() / TruncBytes);
           EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
           return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
diff --git a/llvm/test/CodeGen/SystemZ/vec-combine-01.ll b/llvm/test/CodeGen/SystemZ/vec-combine-01.ll
index 6f0abd6ea5baf..16231b2d89526 100644
--- a/llvm/test/CodeGen/SystemZ/vec-combine-01.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-combine-01.ll
@@ -153,3 +153,13 @@ define void @f7(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
   store i8 %trunc3, ptr %ptr4
   ret void
 }
+
+; Test that a truncating store with a non-simple VT can be handled.
+define void @f8(ptr %src, ptr %dst) {
+; CHECK-LABEL: f8:
+  %1 = load <12 x i32>, ptr %src, align 64
+  %2 = extractelement <12 x i32> %1, i64 11
+  %3 = trunc i32 %2 to i16
+  store i16 %3, ptr %dst, align 2
+  ret void
+}

From 9fdf91271a9ab72090a6d4839d6a96d017218e3c Mon Sep 17 00:00:00 2001
From: bd1976bris <bd1976llvm@gmail.com>
Date: Sun, 6 Oct 2024 22:47:02 +0100
Subject: [PATCH 399/427] [MSVC] work-around for compile time issue 102513

Manual cherry-pick of #110986 to the LLVM 19 release branch.
---
 clang/lib/AST/Interp/Interp.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp
index 6fcd90e5f5849..0f9eedc3f38ea 100644
--- a/clang/lib/AST/Interp/Interp.cpp
+++ b/clang/lib/AST/Interp/Interp.cpp
@@ -925,6 +925,10 @@ void diagnoseEnumValue(InterpState &S, CodePtr OpPC, const EnumDecl *ED,
   }
 }
 
+// https://github.com/llvm/llvm-project/issues/102513
+#if defined(_WIN32) && !defined(__clang__) && !defined(NDEBUG)
+#pragma optimize("", off)
+#endif
 bool Interpret(InterpState &S, APValue &Result) {
   // The current stack frame when we started Interpret().
   // This is being used by the ops to determine wheter
@@ -949,6 +953,10 @@ bool Interpret(InterpState &S, APValue &Result) {
     }
   }
 }
+// https://github.com/llvm/llvm-project/issues/102513
+#if defined(_WIN32) && !defined(__clang__) && !defined(NDEBUG)
+#pragma optimize("", on)
+#endif
 
 } // namespace interp
 } // namespace clang

From 6925f3c7c7d8b83e2195cb8e473eccdecae42607 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston@google.com>
Date: Thu, 14 Nov 2024 10:35:35 -0800
Subject: [PATCH 400/427] Reapply "[sanitizer_common] AND signals in
 BlockSignals instead of deleting (#113443)" for non-Android Linux only
 (#115790)

The original patch (25fd366d6a7d40266ff27c134ed8beb0a90cc33b) was
reverted in 083a5cdbeab09517d8345868970d4f41170d7ed2 because it broke
some buildbots.

This revised patch makes two changes:
- Reverts to *pre-#98200* behavior for Android. This avoids a build
breakage on Android.
- Only define KeepUnblocked if SANITIZER_LINUX: this avoids a build
breakage on solaris, which does not support internal_sigdelset.
N.B. Other buildbot failures were non-sanitizer tests and are therefore
unrelated.

Original commit message:
    My earlier patch https://github.com/llvm/llvm-project/pull/98200
    caused a regression because it unconditionally unblocked synchronous
    signals, even if the user program had deliberately blocked them.
    This patch fixes the issue by checking the current signal mask, as
    suggested by Vitaly. It also adds tests.
    Fixes #113385

(cherry picked from commit 531acf9e2f24977d2556b39229b22f4518a1faa5)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp  | 55 ++++++++++----
 .../lib/sanitizer_common/tests/CMakeLists.txt |  1 +
 .../tests/sanitizer_block_signals.cpp         | 76 +++++++++++++++++++
 3 files changed, 116 insertions(+), 16 deletions(-)
 create mode 100644 compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index b9b1f496df7c9..be3b3bd94e2a5 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -160,33 +160,56 @@ void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) {
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset));
 }
 
+#  if SANITIZER_LINUX
+// Deletes the specified signal from newset, if it is not present in oldset
+// Equivalently: newset[signum] = newset[signum] & oldset[signum]
+static void KeepUnblocked(__sanitizer_sigset_t &newset,
+                          __sanitizer_sigset_t &oldset, int signum) {
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  if (SANITIZER_ANDROID || !internal_sigismember(&oldset, signum))
+    internal_sigdelset(&newset, signum);
+}
+#  endif
+
 // Block asynchronous signals
 void BlockSignals(__sanitizer_sigset_t *oldset) {
-  __sanitizer_sigset_t set;
-  internal_sigfillset(&set);
-#  if SANITIZER_LINUX && !SANITIZER_ANDROID
+  __sanitizer_sigset_t newset;
+  internal_sigfillset(&newset);
+
+#  if SANITIZER_LINUX
+  __sanitizer_sigset_t currentset;
+
+#    if !SANITIZER_ANDROID
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  SetSigProcMask(NULL, &currentset);
+
   // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked
   // on any thread, setuid call hangs.
   // See test/sanitizer_common/TestCases/Linux/setuid.c.
-  internal_sigdelset(&set, 33);
-#  endif
-#  if SANITIZER_LINUX
+  KeepUnblocked(newset, currentset, 33);
+#    endif  // !SANITIZER_ANDROID
+
   // Seccomp-BPF-sandboxed processes rely on SIGSYS to handle trapped syscalls.
   // If this signal is blocked, such calls cannot be handled and the process may
   // hang.
-  internal_sigdelset(&set, 31);
+  KeepUnblocked(newset, currentset, 31);
 
+#    if !SANITIZER_ANDROID
   // Don't block synchronous signals
-  internal_sigdelset(&set, SIGSEGV);
-  internal_sigdelset(&set, SIGBUS);
-  internal_sigdelset(&set, SIGILL);
-  internal_sigdelset(&set, SIGTRAP);
-  internal_sigdelset(&set, SIGABRT);
-  internal_sigdelset(&set, SIGFPE);
-  internal_sigdelset(&set, SIGPIPE);
-#  endif
+  // but also don't unblock signals that the user had deliberately blocked.
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  KeepUnblocked(newset, currentset, SIGSEGV);
+  KeepUnblocked(newset, currentset, SIGBUS);
+  KeepUnblocked(newset, currentset, SIGILL);
+  KeepUnblocked(newset, currentset, SIGTRAP);
+  KeepUnblocked(newset, currentset, SIGABRT);
+  KeepUnblocked(newset, currentset, SIGFPE);
+  KeepUnblocked(newset, currentset, SIGPIPE);
+#    endif  //! SANITIZER_ANDROID
+
+#  endif  // SANITIZER_LINUX
 
-  SetSigProcMask(&set, oldset);
+  SetSigProcMask(&newset, oldset);
 }
 
 ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) {
diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt
index 2b4c15125263a..fef8bb772e0e0 100644
--- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SANITIZER_UNITTESTS
   sanitizer_array_ref_test.cpp
   sanitizer_atomic_test.cpp
   sanitizer_bitvector_test.cpp
+  sanitizer_block_signals.cpp
   sanitizer_bvgraph_test.cpp
   sanitizer_chained_origin_depot_test.cpp
   sanitizer_common_test.cpp
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp
new file mode 100644
index 0000000000000..b43648a8aef23
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_block_signals.cpp
@@ -0,0 +1,76 @@
+//===-- sanitizer_block_signals.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of sanitizer_common unit tests.
+//
+//===----------------------------------------------------------------------===//
+#include <signal.h>
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+#include "sanitizer_common/sanitizer_linux.h"
+
+namespace __sanitizer {
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+volatile int received_sig = -1;
+
+void signal_handler(int signum) { received_sig = signum; }
+
+TEST(SanitizerCommon, NoBlockSignals) {
+  // No signals blocked
+  signal(SIGUSR1, signal_handler);
+  raise(SIGUSR1);
+  EXPECT_EQ(received_sig, SIGUSR1);
+
+  received_sig = -1;
+  signal(SIGPIPE, signal_handler);
+  raise(SIGPIPE);
+  EXPECT_EQ(received_sig, SIGPIPE);
+}
+
+TEST(SanitizerCommon, BlockSignalsPlain) {
+  // ScopedBlockSignals; SIGUSR1 should be blocked but not SIGPIPE
+  {
+    __sanitizer_sigset_t sigset = {};
+    ScopedBlockSignals block(&sigset);
+
+    received_sig = -1;
+    signal(SIGUSR1, signal_handler);
+    raise(SIGUSR1);
+    EXPECT_EQ(received_sig, -1);
+
+    received_sig = -1;
+    signal(SIGPIPE, signal_handler);
+    raise(SIGPIPE);
+    EXPECT_EQ(received_sig, SIGPIPE);
+  }
+  EXPECT_EQ(received_sig, SIGUSR1);
+}
+
+TEST(SanitizerCommon, BlockSignalsExceptPipe) {
+  // Manually block SIGPIPE; ScopedBlockSignals should not unblock this
+  sigset_t block_sigset;
+  sigemptyset(&block_sigset);
+  sigaddset(&block_sigset, SIGPIPE);
+  sigprocmask(SIG_BLOCK, &block_sigset, NULL);
+  {
+    __sanitizer_sigset_t sigset = {};
+    ScopedBlockSignals block(&sigset);
+
+    received_sig = -1;
+    signal(SIGPIPE, signal_handler);
+    raise(SIGPIPE);
+    EXPECT_EQ(received_sig, -1);
+  }
+  sigprocmask(SIG_UNBLOCK, &block_sigset, NULL);
+  EXPECT_EQ(received_sig, SIGPIPE);
+}
+#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
+
+}  // namespace __sanitizer

From aadaa00de76ed0c4987b97450dd638f63a385bed Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Tue, 5 Nov 2024 21:45:13 -0800
Subject: [PATCH 401/427] [WebAssembly] Fix rethrow's index calculation
 (#114693)

So far we have assumed that we only rethrow the exception caught in the
innermost EH pad. This is true in code we directly generate, but after
inlining this may not be the case. For example, consider this code:
```ll
ehcleanup:
  %0 = cleanuppad ...
  call @destructor
  cleanupret from %0 unwind label %catch.dispatch
```

If `destructor` gets inlined into this function, the code can be like
```ll
ehcleanup:
  %0 = cleanuppad ...
  invoke @throwing_func
    to label %unreachale unwind label %catch.dispatch.i

catch.dispatch.i:
  catchswitch ... [ label %catch.start.i ]

catch.start.i:
  %1 = catchpad ...
  invoke @some_function
    to label %invoke.cont.i unwind label %terminate.i

invoke.cont.i:
  catchret from %1 to label %destructor.exit

destructor.exit:
  cleanupret from %0 unwind label %catch.dispatch
```

We lower a `cleanupret` into `rethrow`, which assumes it rethrows the
exception caught by the nearest dominating EH pad. But after the
inlining, the nearest dominating EH pad is not `ehcleanup` but
`catch.start.i`.

The problem exists in the same manner in the new (exnref) EH, because it
assumes the exception comes from the nearest EH pad and saves an exnref
from that EH pad and rethrows it (using `throw_ref`).

This problem can be fixed easily if `cleanupret` has the basic block
where its matching `cleanuppad` is. The bitcode instruction `cleanupret`
kind of has that info (it has a token from the `cleanuppad`), but that
info is lost when when we enter ISel, because `TargetSelectionDAG.td`'s
`cleanupret` node does not have any arguments:

https://github.com/llvm/llvm-project/blob/5091a359d9807db8f7d62375696f93fc34226969/llvm/include/llvm/Target/TargetSelectionDAG.td#L700
Note that `catchret` already has two basic block arguments, even though
neither of them means `catchpad`'s BB.

This PR adds the `cleanuppad`'s BB as an argument to `cleanupret` node
in ISel and uses it in the Wasm backend. Because this node is also used
in X86 backend we need to note its argument there too but nothing more
needs to change there as long as X86 doesn't need it.

---

- Details about changes in the Wasm backend:

After this PR, our pseudo `RETHROW` instruction takes a BB, which means
the EH pad whose exception it needs to rethrow. There are currently two
ways to generate a `RETHROW`: one is from `llvm.wasm.rethrow` intrinsic
and the other is from `CLEANUPRET` we discussed above. In case of
`llvm.wasm.rethrow`, we add a '0' as a placeholder argument when it is
lowered to a `RETHROW`, and change it to a BB in LateEHPrepare. As
written in the comments, this PR doesn't change how this BB is computed.
The BB argument will be converted to an immediate argument as with other
control flow instructions in CFGStackify.

In case of `CLEANUPRET`, it already has a BB argument pointing to an EH
pad, so it is just converted to a `RETHROW` with the same BB argument in
LateEHPrepare. This will also be lowered to an immediate in CFGStackify
with other control flow instructions.

---

Fixes #114600.
---
 .../include/llvm/Target/TargetSelectionDAG.td |   6 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   6 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   2 +-
 .../WebAssembly/WebAssemblyCFGStackify.cpp    |  53 ++-------
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   |  13 +++
 .../WebAssembly/WebAssemblyInstrControl.td    |   9 +-
 .../WebAssembly/WebAssemblyLateEHPrepare.cpp  |  34 +++++-
 llvm/lib/Target/X86/X86InstrCompiler.td       |   3 +-
 .../CodeGen/WebAssembly/cfg-stackify-eh.mir   |  11 +-
 .../CodeGen/WebAssembly/exception-legacy.ll   | 105 ++++++++++++++++++
 llvm/test/CodeGen/WebAssembly/exception.mir   |   2 +-
 11 files changed, 181 insertions(+), 63 deletions(-)

diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 46044aab79a83..e7895258438d2 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -231,6 +231,10 @@ def SDTCatchret : SDTypeProfile<0, 2, [     // catchret
   SDTCisVT<0, OtherVT>, SDTCisVT<1, OtherVT>
 ]>;
 
+def SDTCleanupret : SDTypeProfile<0, 1, [   // cleanupret
+  SDTCisVT<0, OtherVT>
+]>;
+
 def SDTNone : SDTypeProfile<0, 0, []>;      // ret, trap
 
 def SDTUBSANTrap : SDTypeProfile<0, 1, []>;      // ubsantrap
@@ -680,7 +684,7 @@ def brind      : SDNode<"ISD::BRIND"      , SDTBrind,  [SDNPHasChain]>;
 def br         : SDNode<"ISD::BR"         , SDTBr,     [SDNPHasChain]>;
 def catchret   : SDNode<"ISD::CATCHRET"   , SDTCatchret,
                         [SDNPHasChain, SDNPSideEffect]>;
-def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTNone,   [SDNPHasChain]>;
+def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTCleanupret, [SDNPHasChain]>;
 
 def trap       : SDNode<"ISD::TRAP"       , SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 37b1131d2f8a3..7fa3b8a73a419 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2155,8 +2155,10 @@ void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
   FuncInfo.MBB->normalizeSuccProbs();
 
   // Create the terminator node.
-  SDValue Ret =
-      DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+  MachineBasicBlock *CleanupPadMBB =
+      FuncInfo.MBBMap[I.getCleanupPad()->getParent()];
+  SDValue Ret = DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other,
+                            getControlRoot(), DAG.getBasicBlock(CleanupPadMBB));
   DAG.setRoot(Ret);
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1053ba9242768..95f2f91f82bd4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5159,7 +5159,7 @@ let isPseudo = 1 in {
 //===----------------------------------------------------------------------===//
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
     isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
-   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
+   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret bb)]>, Sched<[]>;
    let usesCustomInserter = 1 in
      def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
                     Sched<[]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 70b91c266c497..cd0aea313da0d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -87,9 +87,8 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
                           const MachineBasicBlock *MBB);
   unsigned getDelegateDepth(const SmallVectorImpl<EndMarkerInfo> &Stack,
                             const MachineBasicBlock *MBB);
-  unsigned
-  getRethrowDepth(const SmallVectorImpl<EndMarkerInfo> &Stack,
-                  const SmallVectorImpl<const MachineBasicBlock *> &EHPadStack);
+  unsigned getRethrowDepth(const SmallVectorImpl<EndMarkerInfo> &Stack,
+                           const MachineBasicBlock *EHPadToRethrow);
   void rewriteDepthImmediates(MachineFunction &MF);
   void fixEndsAtEndOfFunction(MachineFunction &MF);
   void cleanupFunctionData(MachineFunction &MF);
@@ -1612,34 +1611,13 @@ unsigned WebAssemblyCFGStackify::getDelegateDepth(
 
 unsigned WebAssemblyCFGStackify::getRethrowDepth(
     const SmallVectorImpl<EndMarkerInfo> &Stack,
-    const SmallVectorImpl<const MachineBasicBlock *> &EHPadStack) {
+    const MachineBasicBlock *EHPadToRethrow) {
   unsigned Depth = 0;
-  // In our current implementation, rethrows always rethrow the exception caught
-  // by the innermost enclosing catch. This means while traversing Stack in the
-  // reverse direction, when we encounter END_TRY, we should check if the
-  // END_TRY corresponds to the current innermost EH pad. For example:
-  // try
-  //   ...
-  // catch         ;; (a)
-  //   try
-  //     rethrow 1 ;; (b)
-  //   catch       ;; (c)
-  //     rethrow 0 ;; (d)
-  //   end         ;; (e)
-  // end           ;; (f)
-  //
-  // When we are at 'rethrow' (d), while reversely traversing Stack the first
-  // 'end' we encounter is the 'end' (e), which corresponds to the 'catch' (c).
-  // And 'rethrow' (d) rethrows the exception caught by 'catch' (c), so we stop
-  // there and the depth should be 0. But when we are at 'rethrow' (b), it
-  // rethrows the exception caught by 'catch' (a), so when traversing Stack
-  // reversely, we should skip the 'end' (e) and choose 'end' (f), which
-  // corresponds to 'catch' (a).
   for (auto X : reverse(Stack)) {
     const MachineInstr *End = X.second;
     if (End->getOpcode() == WebAssembly::END_TRY) {
       auto *EHPad = TryToEHPad[EndToBegin[End]];
-      if (EHPadStack.back() == EHPad)
+      if (EHPadToRethrow == EHPad)
         break;
     }
     ++Depth;
@@ -1651,7 +1629,6 @@ unsigned WebAssemblyCFGStackify::getRethrowDepth(
 void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
   // Now rewrite references to basic blocks to be depth immediates.
   SmallVector<EndMarkerInfo, 8> Stack;
-  SmallVector<const MachineBasicBlock *, 8> EHPadStack;
   for (auto &MBB : reverse(MF)) {
     for (MachineInstr &MI : llvm::reverse(MBB)) {
       switch (MI.getOpcode()) {
@@ -1669,31 +1646,14 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
         break;
 
       case WebAssembly::END_BLOCK:
+      case WebAssembly::END_TRY:
         Stack.push_back(std::make_pair(&MBB, &MI));
         break;
 
-      case WebAssembly::END_TRY: {
-        // We handle DELEGATE in the default level, because DELEGATE has
-        // immediate operands to rewrite.
-        Stack.push_back(std::make_pair(&MBB, &MI));
-        auto *EHPad = TryToEHPad[EndToBegin[&MI]];
-        EHPadStack.push_back(EHPad);
-        break;
-      }
-
       case WebAssembly::END_LOOP:
         Stack.push_back(std::make_pair(EndToBegin[&MI]->getParent(), &MI));
         break;
 
-      case WebAssembly::CATCH:
-      case WebAssembly::CATCH_ALL:
-        EHPadStack.pop_back();
-        break;
-
-      case WebAssembly::RETHROW:
-        MI.getOperand(0).setImm(getRethrowDepth(Stack, EHPadStack));
-        break;
-
       default:
         if (MI.isTerminator()) {
           // Rewrite MBB operands to be depth immediates.
@@ -1705,6 +1665,9 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
               if (MI.getOpcode() == WebAssembly::DELEGATE)
                 MO = MachineOperand::CreateImm(
                     getDelegateDepth(Stack, MO.getMBB()));
+              else if (MI.getOpcode() == WebAssembly::RETHROW)
+                MO = MachineOperand::CreateImm(
+                    getRethrowDepth(Stack, MO.getMBB()));
               else
                 MO = MachineOperand::CreateImm(
                     getBranchDepth(Stack, MO.getMBB()));
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 0f06f54f219f9..18545e92886ac 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -245,6 +245,19 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
       ReplaceNode(Node, Throw);
       return;
     }
+    case Intrinsic::wasm_rethrow: {
+      // RETHROW's BB argument will be populated in LateEHPrepare. Just use a
+      // '0' as a placeholder for now.
+      MachineSDNode *Rethrow = CurDAG->getMachineNode(
+          WebAssembly::RETHROW, DL,
+          MVT::Other, // outchain type
+          {
+              CurDAG->getConstant(0, DL, MVT::i32), // placeholder
+              Node->getOperand(0)                   // inchain
+          });
+      ReplaceNode(Node, Rethrow);
+      return;
+    }
     }
     break;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index be6547007aaf7..261277f8a02cf 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -132,11 +132,9 @@ let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
 defm THROW : I<(outs), (ins tag_op:$tag, variable_ops),
                (outs), (ins tag_op:$tag), [],
                "throw   \t$tag", "throw   \t$tag", 0x08>;
-defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>;
+// $ehpad is the EH pad where the exception to rethrow has been caught.
+defm RETHROW : NRI<(outs), (ins bb_op:$ehpad), [], "rethrow \t$ehpad", 0x09>;
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
-// The depth argument will be computed in CFGStackify. We set it to 0 here for
-// now.
-def : Pat<(int_wasm_rethrow), (RETHROW 0)>;
 
 // Region within which an exception is caught: try / end_try
 let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
@@ -160,7 +158,8 @@ defm DELEGATE : NRI<(outs), (ins bb_op:$dst), [], "delegate \t $dst", 0x18>;
 // Pseudo instructions: cleanupret / catchret
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
     isPseudo = 1, isEHScopeReturn = 1 in {
-  defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "cleanupret", 0>;
+  defm CLEANUPRET : NRI<(outs), (ins bb_op:$ehpad), [(cleanupret bb:$ehpad)],
+                        "cleanupret", 0>;
   defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
                       [(catchret bb:$dst, bb:$from)], "catchret", 0>;
 } // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 94037b9ab189d..b8f3bcb57f6bf 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -245,11 +245,39 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
       Changed = true;
       break;
     }
+    case WebAssembly::RETHROW:
+      // These RETHROWs here were lowered from llvm.wasm.rethrow() intrinsics,
+      // generated in Clang for when an exception is not caught by the given
+      // type (e.g. catch (int)).
+      //
+      // RETHROW's BB argument is the EH pad where the exception to rethrow has
+      // been caught. (Until this point, RETHROW has just a '0' as a placeholder
+      // argument.) For these llvm.wasm.rethrow()s, we can safely assume the
+      // exception comes from the nearest dominating EH pad, because catch.start
+      // EH pad is structured like this:
+      //
+      // catch.start:
+      //   catchpad ...
+      //   %matches = compare ehselector with typeid
+      //   br i1 %matches, label %catch, label %rethrow
+      //
+      // rethrow:
+      //   ;; rethrows the exception caught in 'catch.start'
+      //   call @llvm.wasm.rethrow()
+      TI->removeOperand(0);
+      TI->addOperand(MachineOperand::CreateMBB(getMatchingEHPad(TI)));
+      Changed = true;
+      break;
     case WebAssembly::CLEANUPRET: {
-      // Replace a cleanupret with a rethrow. For C++ support, currently
-      // rethrow's immediate argument is always 0 (= the latest exception).
+      // CLEANUPRETs have the EH pad BB the exception to rethrow has been caught
+      // as an argument. Use it and change the instruction opcode to 'RETHROW'
+      // to make rethrowing instructions consistent.
+      //
+      // This is because we cannot safely assume that it is always the nearest
+      // dominating EH pad, in case there are code transformations such as
+      // inlining.
       BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
-          .addImm(0);
+          .addMBB(TI->getOperand(0).getMBB());
       TI->eraseFromParent();
       Changed = true;
       break;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 5a8177e2b3607..9b13447754e4c 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -195,7 +195,8 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 
 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
     isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in {
-  def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
+  def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET",
+                     [(cleanupret bb)]>;
 
   // CATCHRET needs a custom inserter for SEH.
   let usesCustomInserter = 1 in
diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir
index 0386410d1b612..c434e14b30d15 100644
--- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir
+++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.mir
@@ -39,15 +39,16 @@ body: |
     ; CHECK: RETHROW 1
     EH_LABEL <mcsymbol .Ltmp2>
     %0:i32 = CATCH &__cpp_exception, implicit-def dead $arguments
-    RETHROW 0, implicit-def dead $arguments
+    RETHROW %bb.1, implicit-def dead $arguments
 
   bb.2 (landing-pad):
+    successors: %bb.3
     ; CHECK: bb.2 (landing-pad):
     ; CHECK: CATCH
     ; CHECK: RETHROW 0
     EH_LABEL <mcsymbol .Ltmp3>
     %1:i32 = CATCH &__cpp_exception, implicit-def dead $arguments
-    RETHROW 0, implicit-def dead $arguments
+    RETHROW %bb.2, implicit-def dead $arguments
 
   bb.3:
     ; CHECK: bb.3:
@@ -104,12 +105,14 @@ body: |
     RETURN %0:i32, implicit-def dead $arguments
 
   bb.3 (landing-pad):
+    successors:
     EH_LABEL <mcsymbol .Ltmp4>
     %0:i32 = CATCH &__cpp_exception, implicit-def dead $arguments
-    RETHROW 0, implicit-def dead $arguments
+    RETHROW %bb.3, implicit-def dead $arguments
 
   bb.4 (landing-pad):
+    successors:
     EH_LABEL <mcsymbol .Ltmp5>
     %1:i32 = CATCH &__cpp_exception, implicit-def dead $arguments
-    RETHROW 0, implicit-def dead $arguments
+    RETHROW %bb.4, implicit-def dead $arguments
 ...
diff --git a/llvm/test/CodeGen/WebAssembly/exception-legacy.ll b/llvm/test/CodeGen/WebAssembly/exception-legacy.ll
index 3537baa425164..a0429f40b2540 100644
--- a/llvm/test/CodeGen/WebAssembly/exception-legacy.ll
+++ b/llvm/test/CodeGen/WebAssembly/exception-legacy.ll
@@ -400,6 +400,107 @@ unreachable:                                      ; preds = %rethrow
   unreachable
 }
 
+; The bitcode below is generated when the code below is compiled and
+; Temp::~Temp() is inlined into inlined_cleanupret():
+;
+; void inlined_cleanupret() {
+; try {
+;   Temp t;
+;   throw 2;
+; } catch (...)
+; }
+;
+; Temp::~Temp() {
+;   try {
+;     throw 1;
+;   } catch (...) {
+;   }
+; }
+;
+; ~Temp() generates cleanupret, which is lowered to a 'rethrow' later. That
+; rethrow's immediate argument should correctly target the top-level cleanuppad
+; (catch_all). This is a regression test for the bug where we did not compute
+; rethrow's argument correctly.
+
+; CHECK-LABEL: inlined_cleanupret:
+; CHECK: try
+; CHECK:   call  __cxa_throw
+; CHECK: catch_all
+; CHECK:   try
+; CHECK:     try
+; CHECK:       call  __cxa_throw
+; CHECK:       catch
+; CHECK:       call  __cxa_end_catch
+; CHECK:       try
+; CHECK:         try
+; Note that this rethrow targets the top-level catch_all
+; CHECK:           rethrow   4
+; CHECK:         catch
+; CHECK:           try
+; CHECK:             call  __cxa_end_catch
+; CHECK:           delegate    5
+; CHECK:           return
+; CHECK:         end_try
+; CHECK:       delegate    3
+; CHECK:     end_try
+; CHECK:   catch_all
+; CHECK:     call  _ZSt9terminatev
+; CHECK:   end_try
+; CHECK: end_try
+define void @inlined_cleanupret() personality ptr @__gxx_wasm_personality_v0 {
+entry:
+  %exception = tail call ptr @__cxa_allocate_exception(i32 4)
+  store i32 2, ptr %exception, align 16
+  invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @_ZTIi, ptr null)
+          to label %unreachable unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  %exception.i = call ptr @__cxa_allocate_exception(i32 4) [ "funclet"(token %0) ]
+  store i32 1, ptr %exception.i, align 16
+  invoke void @__cxa_throw(ptr nonnull %exception.i, ptr nonnull @_ZTIi, ptr null) [ "funclet"(token %0) ]
+          to label %unreachable unwind label %catch.dispatch.i
+
+catch.dispatch.i:                                 ; preds = %ehcleanup
+  %1 = catchswitch within %0 [label %catch.start.i] unwind label %terminate.i
+
+catch.start.i:                                    ; preds = %catch.dispatch.i
+  %2 = catchpad within %1 [ptr null]
+  %3 = tail call ptr @llvm.wasm.get.exception(token %2)
+  %4 = tail call i32 @llvm.wasm.get.ehselector(token %2)
+  %5 = call ptr @__cxa_begin_catch(ptr %3) [ "funclet"(token %2) ]
+  invoke void @__cxa_end_catch() [ "funclet"(token %2) ]
+          to label %invoke.cont.i unwind label %terminate.i
+
+invoke.cont.i:                                    ; preds = %catch.start.i
+  catchret from %2 to label %_ZN4TempD2Ev.exit
+
+terminate.i:                                      ; preds = %catch.start.i, %catch.dispatch.i
+  %6 = cleanuppad within %0 []
+  call void @_ZSt9terminatev() [ "funclet"(token %6) ]
+  unreachable
+
+_ZN4TempD2Ev.exit:                                ; preds = %invoke.cont.i
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %_ZN4TempD2Ev.exit
+  %7 = catchswitch within none [label %catch.start] unwind to caller
+
+catch.start:                                      ; preds = %catch.dispatch
+  %8 = catchpad within %7 [ptr null]
+  %9 = tail call ptr @llvm.wasm.get.exception(token %8)
+  %10 = tail call i32 @llvm.wasm.get.ehselector(token %8)
+  %11 = call ptr @__cxa_begin_catch(ptr %9) #8 [ "funclet"(token %8) ]
+  call void @__cxa_end_catch() [ "funclet"(token %8) ]
+  catchret from %8 to label %try.cont
+
+try.cont:                                         ; preds = %catch.start
+  ret void
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
 
 declare void @foo()
 declare void @bar(ptr)
@@ -415,8 +516,12 @@ declare i32 @llvm.wasm.get.ehselector(token) #0
 declare void @llvm.wasm.rethrow() #1
 ; Function Attrs: nounwind
 declare i32 @llvm.eh.typeid.for(ptr) #0
+; Function Attrs: nounwind
+declare ptr @__cxa_allocate_exception(i32) #0
 declare ptr @__cxa_begin_catch(ptr)
 declare void @__cxa_end_catch()
+; Function Attrs: noreturn
+declare void @__cxa_throw(ptr, ptr, ptr) #1
 declare void @_ZSt9terminatev()
 declare ptr @_ZN4TempD2Ev(ptr returned)
 
diff --git a/llvm/test/CodeGen/WebAssembly/exception.mir b/llvm/test/CodeGen/WebAssembly/exception.mir
index 895e8d8864ea2..a5f78c18db16a 100644
--- a/llvm/test/CodeGen/WebAssembly/exception.mir
+++ b/llvm/test/CodeGen/WebAssembly/exception.mir
@@ -105,7 +105,7 @@ body: |
 
   bb.2:
     successors: %bb.3
-    CLEANUPRET implicit-def dead $arguments
+    CLEANUPRET %bb.1, implicit-def dead $arguments
 
   bb.3:
     RETURN implicit-def dead $arguments

From 51dee6b64fda7c49a68af78555135c8f51a308cf Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 19 Nov 2024 13:43:39 +0100
Subject: [PATCH 402/427] Bump version to 19.1.5

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 6ccb934aef436..9b39550118c49 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 4)
+  set(LLVM_VERSION_PATCH 5)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index a929db5d0f2d1..33e0043136fee 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190104
+#  define _LIBCPP_VERSION 190105
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 0c2804f70a147..c32a040dcba67 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 4
+llvm_version_patch = 5
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 81b74db977b08..8557e5a061d1d 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 4)
+__versioninfo__ = (19, 1, 5)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 77fe60a0b1590..e2f8ca88d91ec 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 4)
+__versioninfo__ = (19, 1, 5)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 02930b87faeb490505b22f588757a18744248b6f Mon Sep 17 00:00:00 2001
From: Manasij Mukherjee <manasij7479@gmail.com>
Date: Fri, 4 Oct 2024 15:15:30 -0600
Subject: [PATCH 403/427] [NVPTX] Promote v2i8 to v2i16 (#111189)

Promote v2i8 to v2i16, fixes a crash.
Re-enable a test in NVPTX/vector-returns.ll

Partial cherry-pick of fda2fea w/o the test which does not exist in release/19.x
https://github.com/llvm/llvm-project/issues/104864
---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 6975412ce5d35..b2153a7afe736 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -229,6 +229,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
         // v*i8 are formally lowered as v4i8
         EltVT = MVT::v4i8;
         NumElts = (NumElts + 3) / 4;
+      } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
+        // v2i8 is promoted to v2i16
+        NumElts = 1;
+        EltVT = MVT::v2i16;
       }
       for (unsigned j = 0; j != NumElts; ++j) {
         ValueVTs.push_back(EltVT);

From e032d7ad80f8d185626b39751f672932f92dc033 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Tue, 19 Nov 2024 13:37:40 +0530
Subject: [PATCH 404/427] [clang-repl] Improve flags responsible for generating
 shared wasm binaries (#116735)

There are a couple changes in this PR that help getting clang-repl to
run in the browser. Using a jupyterlite instance for the example pasted
below

1) Updating flags responsible for generating shared wasm binaries that
need to be dynamically loaded Most Importantly as can be seen in the
changes `shared` and `allow-undefined` are crucial.

![image](https://github.com/user-attachments/assets/1183fd44-8951-496a-899a-e4af39a48447)

2) While exiting we encounter this.

![image](https://github.com/user-attachments/assets/9487a3f4-7200-471d-ba88-09e98ccbc47a)

Now as can be seen here

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Interpreter.cpp#L421-L430

We call cleanUP in the destructor. Now cleanUP through
IncrementalExecutor tries to deinitialize the JIT which wasn't even
intialized as runCtors in wasm.cpp is a no-op

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/IncrementalExecutor.cpp#L94-L101

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Wasm.cpp#L107-L109
(cherry picked from commit 752dbd6112affa418e33910ac08bf9921f9c270b)
---
 clang/lib/Interpreter/IncrementalExecutor.h |  2 +-
 clang/lib/Interpreter/Interpreter.cpp       |  1 -
 clang/lib/Interpreter/Wasm.cpp              | 10 ++++++++--
 clang/lib/Interpreter/Wasm.h                |  1 +
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h
index 7954cde36588b..dbd61f0b8b1eb 100644
--- a/clang/lib/Interpreter/IncrementalExecutor.h
+++ b/clang/lib/Interpreter/IncrementalExecutor.h
@@ -56,7 +56,7 @@ class IncrementalExecutor {
   virtual llvm::Error addModule(PartialTranslationUnit &PTU);
   virtual llvm::Error removeModule(PartialTranslationUnit &PTU);
   virtual llvm::Error runCtors() const;
-  llvm::Error cleanUp();
+  virtual llvm::Error cleanUp();
   llvm::Expected<llvm::orc::ExecutorAddr>
   getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const;
 
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index b4882ab5d2236..c0b8bfebb4343 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -192,7 +192,6 @@ IncrementalCompilerBuilder::CreateCpp() {
 #ifdef __EMSCRIPTEN__
   Argv.push_back("-target");
   Argv.push_back("wasm32-unknown-emscripten");
-  Argv.push_back("-pie");
   Argv.push_back("-shared");
 #endif
   Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp
index 1001410aa0f27..79efbaa03982d 100644
--- a/clang/lib/Interpreter/Wasm.cpp
+++ b/clang/lib/Interpreter/Wasm.cpp
@@ -72,13 +72,13 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
   OutputFile.close();
 
   std::vector<const char *> LinkerArgs = {"wasm-ld",
-                                          "-pie",
+                                          "-shared",
                                           "--import-memory",
                                           "--no-entry",
                                           "--export-all",
                                           "--experimental-pic",
-                                          "--no-export-dynamic",
                                           "--stack-first",
+                                          "--allow-undefined",
                                           OutputFileName.c_str(),
                                           "-o",
                                           OutputFileName.c_str()};
@@ -109,6 +109,12 @@ llvm::Error WasmIncrementalExecutor::runCtors() const {
   return llvm::Error::success();
 }
 
+llvm::Error WasmIncrementalExecutor::cleanUp() const {
+  // Can't call cleanUp through IncrementalExecutor as it
+  // tries to deinitialize JIT which hasn't been initialized
+  return llvm::Error::success();
+}
+
 WasmIncrementalExecutor::~WasmIncrementalExecutor() = default;
 
 } // namespace clang
diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h
index b1fd88024f14d..4632613326d39 100644
--- a/clang/lib/Interpreter/Wasm.h
+++ b/clang/lib/Interpreter/Wasm.h
@@ -28,6 +28,7 @@ class WasmIncrementalExecutor : public IncrementalExecutor {
   llvm::Error addModule(PartialTranslationUnit &PTU) override;
   llvm::Error removeModule(PartialTranslationUnit &PTU) override;
   llvm::Error runCtors() const override;
+  llvm::Error cleanUp() override;
 
   ~WasmIncrementalExecutor() override;
 };

From fb6b195cae03ba6e5b50870031d710ca6886c5bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 20 Oct 2024 13:51:50 +0300
Subject: [PATCH 405/427] [compiler-rt] [test] Remove an unintended grep
 parameter

This parameter seems unintentional here; we're trying to grep
the input on stdin, from the earlier stage in the pipeline.

Since a recent update on Github Actions runners, the previous
form (grepping a file, while piping in data on stdin) would fail
running the test, with the test runner Python script throwing
an exception when evaluating it:

      File "D:\a\llvm-mingw\llvm-mingw\llvm-project\llvm\utils\lit\lit\TestRunner.py", line 935, in _executeShCmd
        out = procs[i].stdout.read()
              ^^^^^^^^^^^^^^^^^^^^^^
      File "C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\encodings\cp1252.py", line 23, in decode
        return codecs.charmap_decode(input,self.errors,decoding_table)[0]
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    TypeError: a bytes-like object is required, not 'NoneType'

(cherry picked from commit c2717a89b8437d041d532c7b2c535ca4f4b35872)
---
 compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
index 9277fe0b23516..38e99cf685945 100644
--- a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
@@ -9,7 +9,7 @@
 // static build, there won't be any clang_rt DLLs.
 // RUN: not grep cl""ang_rt %t || \
 // RUN:   grep cl""ang_rt %t | xargs which | \
-// RUN:   xargs llvm-readobj --coff-imports | not grep dbghelp.dll %t
+// RUN:   xargs llvm-readobj --coff-imports | not grep dbghelp.dll
 
 extern "C" int puts(const char *);
 

From ea960400213ad6a619d40536ae8965fca70eac89 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 16 Nov 2024 20:55:33 -0800
Subject: [PATCH 406/427] [Mips] Change vsplat_imm_eq_1 to a ComplexPattern.
 (#116471)

Resolves a FIXME and avoids needing to workaround #116075.

Adding parentheses around the (vsplat_imm_eq_1) fixes the error cited in
the FIXME by changing the ComplexPattern from a leaf node to an
operator.

(cherry picked from commit 2f4572f5e7e2d7f4626e825404c11f07d191fb05)
---
 llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp   |  4 ++
 llvm/lib/Target/Mips/MipsISelDAGToDAG.h     |  3 ++
 llvm/lib/Target/Mips/MipsMSAInstrInfo.td    | 59 ++++++++-------------
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 12 +++++
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h   |  3 ++
 5 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f6f32fde3b777..a9ffd2bedf21e 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -220,6 +220,10 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
   return false;
 }
 
+bool MipsDAGToDAGISel::selectVSplatImmEq1(SDValue N) const {
+  llvm_unreachable("Unimplemented function.");
+}
+
 /// Convert vector addition with vector subtraction if that allows to encode
 /// constant as an immediate and thus avoid extra 'ldi' instruction.
 /// add X, <-1, -1...> --> sub X, <1, 1...>
diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index 6135f96807854..3485300a782c9 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -120,6 +120,9 @@ class MipsDAGToDAGISel : public SelectionDAGISel {
   /// starting at bit zero.
   virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
 
+  /// Select constant vector splats whose value is 1.
+  virtual bool selectVSplatImmEq1(SDValue N) const;
+
   /// Convert vector addition with vector subtraction if that allows to encode
   /// constant as an immediate and thus avoid extra 'ldi' instruction.
   /// add X, <-1, -1...> --> sub X, <1, 1...>
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index c4abccb24c6f3..f4c32c9dcd421 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -198,14 +198,8 @@ def vsplati32 : PatFrag<(ops node:$e0),
                         (v4i32 (build_vector node:$e0, node:$e0,
                                              node:$e0, node:$e0))>;
 
-def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
-  APInt Imm;
-  SDNode *BV = N->getOperand(0).getNode();
-  EVT EltTy = N->getValueType(0).getVectorElementType();
-
-  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
-         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
-}]>;
+// Any build_vector that is a constant splat with a value that equals 1
+def vsplat_imm_eq_1 : ComplexPattern<vAny, 0, "selectVSplatImmEq1">;
 
 def vsplati64 : PatFrag<(ops node:$e0),
                         (v2i64 (build_vector node:$e0, node:$e0))>;
@@ -217,7 +211,7 @@ def vsplati64_splat_d : PatFrag<(ops node:$e0),
                                                                 node:$e0,
                                                                 node:$e0,
                                                                 node:$e0)),
-                                           vsplati64_imm_eq_1))))>;
+                                           (vsplat_imm_eq_1)))))>;
 
 def vsplatf32 : PatFrag<(ops node:$e0),
                         (v4f32 (build_vector node:$e0, node:$e0,
@@ -352,46 +346,35 @@ def vsplat_maskr_bits_uimm6
     : SplatComplexPattern<vsplat_uimm6, vAny, 1, "selectVSplatMaskR",
                           [build_vector, bitconvert]>;
 
-// Any build_vector that is a constant splat with a value that equals 1
-// FIXME: These should be a ComplexPattern but we can't use them because the
-//        ISel generator requires the uses to have a name, but providing a name
-//        causes other errors ("used in pattern but not operand list")
-def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
-  APInt Imm;
-  EVT EltTy = N->getValueType(0).getVectorElementType();
-
-  return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
-         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
-}]>;
 
 def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_h : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_w : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_d : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1),
+                      (and node:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)),
                                                node:$wt)))>;
 
 def vbneg_b : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_h : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_w : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_d : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+                      (xor node:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                           node:$wt))>;
 
 def vbset_b : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_h : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_w : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_d : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+                      (or node:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                          node:$wt))>;
 
 def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt),
@@ -3842,7 +3825,7 @@ class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> :
          (VT (Insn VT:$ws, VT:$wt))>;
 
 class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> :
-  MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))),
+  MSAPat<(VT (Node VT:$ws, (shl (vsplat_imm_eq_1), (Frag VT:$wt)))),
          (VT (Insn VT:$ws, VT:$wt))>;
 
 multiclass MSAShiftPats<SDNode Node, string Insn> {
@@ -3861,7 +3844,7 @@ multiclass MSABitPats<SDNode Node, string Insn> {
   def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>;
   def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>;
   def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>;
-  def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+  def : MSAPat<(Node v2i64:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                      (vsplati64imm63 v2i64:$wt))),
                (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
 }
@@ -3872,16 +3855,16 @@ defm : MSAShiftPats<sra, "SRA">;
 defm : MSABitPats<xor, "BNEG">;
 defm : MSABitPats<or, "BSET">;
 
-def : MSAPat<(and v16i8:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v16i8:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati8imm7 v16i8:$wt)))),
              (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>;
-def : MSAPat<(and v8i16:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v8i16:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati16imm15 v8i16:$wt)))),
              (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>;
-def : MSAPat<(and v4i32:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v4i32:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati32imm31 v4i32:$wt)))),
              (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>;
-def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1),
+def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)),
                                         (vsplati64imm63 v2i64:$wt)))),
              (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>;
 
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 7ad300c6cccd4..66c034a889c60 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -730,6 +730,18 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
   return false;
 }
 
+// Select const vector splat of 1.
+bool MipsSEDAGToDAGISel::selectVSplatImmEq1(SDValue N) const {
+  APInt ImmValue;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
+
+  return selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+         ImmValue.getBitWidth() == EltTy.getSizeInBits() && ImmValue == 1;
+}
+
 bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   SDLoc DL(Node);
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 7b843b0e0b255..22d8e924ac534 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -124,6 +124,9 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
   /// starting at bit zero.
   bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override;
 
+  /// Select constant vector splats whose value is 1.
+  bool selectVSplatImmEq1(SDValue N) const override;
+
   bool trySelect(SDNode *Node) override;
 
   // Emits proper ABI for _mcount profiling calls.

From 3d12f45e50b68ac908ef05571e5cc52f4b966d94 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 21:24:40 +0800
Subject: [PATCH 407/427] [SDAG][ISel][TableGen][LoongArch] Report error for
 trivial bitcasts when there are predicate calls (#116075)

On loongarch64 with lsx extension, we select `VBITREV_W` for `v4i32 (xor
X, (shl splat(1), Y))`:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L1583-L1584

And `vsplat_imm_eq_1` is defined as:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L77-L87

For the `(bitconvert (v4i32 (build_vector)))` case, the pattern is
expected to be:
```
PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (bitconvert:{ *:[v4i32] } (build_vector:{ *:[v4i32] }))<<P:Predicate_vsplat_imm_eq_1>>, v4i32:{ *:[v4i32] }:$vk))
RESULT:  (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk)
```

However, `simplifyTree` drops the `bitconvert` node and its predicates:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp#L3036-L3062

Then llvm will match `vsplat_imm_eq_1` for any v4i32 splats and cause a
miscompilation:
```
PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (build_vector:{ *:[v4i32] }), v4i32:{ *:[v4i32] }:$vk))
RESULT:  (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk)
```

This patch adds additional checks for predicates associated with the
trivial bitconvert node. Unused patterns in the LoongArch target are
also removed.

Fixes https://github.com/llvm/llvm-project/issues/116008.

(cherry picked from commit c727b48287cc96888f9e262f23d53cf635cf3b3d)
---
 .../Target/LoongArch/LoongArchLSXInstrInfo.td   |  6 ++----
 llvm/test/CodeGen/LoongArch/lsx/pr116008.ll     | 17 +++++++++++++++++
 .../TableGen/Common/CodeGenDAGPatterns.cpp      |  8 ++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/lsx/pr116008.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 0580683c3ce30..0233baecf6dd9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -67,8 +67,7 @@ class VecCond<SDPatternOperator OpNode, ValueType TyNode,
   let usesCustomInserter = 1;
 }
 
-def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector),
-                                       (bitconvert (v4i32 (build_vector)))], [{
+def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector)], [{
   APInt Imm;
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
@@ -109,8 +108,7 @@ def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{
   return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
          Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31;
 }]>;
-def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector),
-                                           (bitconvert (v4i32 (build_vector)))], [{
+def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector)], [{
   APInt Imm;
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
diff --git a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll
new file mode 100644
index 0000000000000..ba8ffc3493189
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define <4 x i32> @xor_shl_splat_vec_one(i32 %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: xor_shl_splat_vec_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
+; CHECK-NEXT:    vsll.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vbitrevi.w $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+entry:
+  %ins = insertelement <4 x i32> poison, i32 %x, i64 0
+  %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %splat, %y
+  %xor = xor <4 x i32> %shl, splat (i32 1)
+  ret <4 x i32> %xor
+}
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index a8cecca0d4a54..ca71569008d5e 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -3042,6 +3042,14 @@ static bool SimplifyTree(TreePatternNodePtr &N) {
       !N->getExtType(0).empty() &&
       N->getExtType(0) == N->getChild(0).getExtType(0) &&
       N->getName().empty()) {
+    if (!N->getPredicateCalls().empty()) {
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << *N
+         << "\n trivial bitconvert node should not have predicate calls\n";
+      PrintFatalError(Str);
+      return false;
+    }
     N = N->getChildShared(0);
     SimplifyTree(N);
     return true;

From f9ae37c670d4bcf4713278ac94d2c8991a326f9e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 22:17:24 +0800
Subject: [PATCH 408/427] [InstCombine] Handle constant GEP expr in
 `SimplifyDemandedUseBits` (#116794)

Closes https://github.com/llvm/llvm-project/issues/116775.

(cherry picked from commit 03d8831fa8ef5b7e32172c718b550a454645faea)
---
 .../InstCombine/InstCombineSimplifyDemanded.cpp     |  2 +-
 llvm/test/Transforms/InstCombine/ptrmask.ll         | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8a6ec3076ac62..b9d06b5936850 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1004,7 +1004,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
             uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
 
             if (MaskedGEPIndex != GEPIndex) {
-              auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0));
+              auto *GEP = cast<GEPOperator>(II->getArgOperand(0));
               Builder.SetInsertPoint(I);
               Type *GEPIndexType =
                   DL.getIndexType(GEP->getPointerOperand()->getType());
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 4631b81cd1ce1..cd998bac3f9f0 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -578,3 +578,16 @@ define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) {
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
   ret ptr %r
 }
+
+@GC_arrays = external global { i8, i8, i64 }
+
+define ptr @ptrmask_demandedbits_constantexpr() {
+; CHECK-LABEL: define ptr @ptrmask_demandedbits_constantexpr() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr nonnull @GC_arrays, i64 -8)
+; CHECK-NEXT:    ret ptr [[ALIGNED_RESULT]]
+;
+entry:
+  %aligned_result = call ptr @llvm.ptrmask.p0.i64(ptr getelementptr inbounds (i8, ptr @GC_arrays, i64 1), i64 -8)
+  ret ptr %aligned_result
+}

From e80925b5eb4c824fe97a055d49faa586de16c2b9 Mon Sep 17 00:00:00 2001
From: Alexey Karyakin <akaryaki@quicinc.com>
Date: Tue, 19 Nov 2024 09:27:01 -0600
Subject: [PATCH 409/427] [lld][Hexagon] Fix R_HEX_B22_PCREL range checks
 (#115925)

Range checks for R_HEX_B22_PCREL did not account for the fact that
offset is measured in instructions, not bytes.

Add a test for all range-checked relocations.
---
 lld/ELF/Arch/Hexagon.cpp          |  2 +-
 lld/test/ELF/hexagon-jump-error.s |  2 +-
 lld/test/ELF/hexagon.s            | 41 ++++++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 56cf96fd17704..8bcd28309f8b3 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -329,7 +329,7 @@ void Hexagon::relocate(uint8_t *loc, const Relocation &rel,
   case R_HEX_B22_PCREL:
   case R_HEX_GD_PLT_B22_PCREL:
   case R_HEX_PLT_B22_PCREL:
-    checkInt(loc, val, 22, rel);
+    checkInt(loc, val, 24, rel);
     or32le(loc, applyMask(0x1ff3ffe, val >> 2));
     break;
   case R_HEX_B22_PCREL_X:
diff --git a/lld/test/ELF/hexagon-jump-error.s b/lld/test/ELF/hexagon-jump-error.s
index fec873827e573..53860b5daf2b1 100644
--- a/lld/test/ELF/hexagon-jump-error.s
+++ b/lld/test/ELF/hexagon-jump-error.s
@@ -25,7 +25,7 @@ if (p0) jump #1f
 .section b15, "ax"
 1:
 
-# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-2097152, 2097151]
+# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-8388608, 8388607]
 jump #1f
 .space (1<<23)
 .section b22, "ax"
diff --git a/lld/test/ELF/hexagon.s b/lld/test/ELF/hexagon.s
index 8ef9b8eead8f1..b1576fb47d81a 100644
--- a/lld/test/ELF/hexagon.s
+++ b/lld/test/ELF/hexagon.s
@@ -1,7 +1,9 @@
 # REQUIRES: hexagon
 # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %S/Inputs/hexagon.s -o %t1.o
-# RUN: ld.lld %t.o %t1.o -o %t
+# RUN: ld.lld %t.o %t1.o -o %t --Ttext=0x200b4 --section-start=b_1000000=0x1000000 \
+# RUN:  --section-start=b_1000400=0x1000400 --section-start=b_1004000=0x1004000 \
+# RUN:  --section-start=b_1010000=0x1010000 --section-start=b_1800000=0x1800000
 # RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s
 
 # Note: 131584 == 0x20200
@@ -221,3 +223,40 @@ r0 = memw(r1+##_start)
 
 memw(r0+##_start) = r1
 # CHECK: memw(r0+##131644) = r1
+
+
+## Tests for maximum branch ranges reachable without trampolines.
+
+.section b_1000000, "ax"
+## The nop makes sure the first jump is within range.
+nop
+{ r0 = #0; jump #b_1000400 } // R_HEX_B9_PCREL
+if (r0==#0) jump:t #b_1004000 // R_HEX_B13_PCREL
+if (p0) jump #b_1010000 // R_HEX_B15_PCREL
+jump #b_1800000 // R_HEX_B22_PCREL
+
+.section b_1000400, "ax"
+nop
+
+.section b_1004000, "ax"
+nop
+
+.section b_1010000, "ax"
+nop
+
+.section b_1800000, "ax"
+nop
+
+## Make sure we got the right relocations.
+# RUN: llvm-readelf -r %t.o | FileCheck %s --check-prefix=REL
+# REL: R_HEX_B9_PCREL         00000000   b_1000400
+# REL: R_HEX_B13_PCREL        00000000   b_1004000
+# REL: R_HEX_B15_PCREL        00000000   b_1010000
+# REL: R_HEX_B22_PCREL        00000000   b_1800000
+
+# CHECK: 01000000 <b_1000000>:
+# CHECK-NEXT: 1000000: {{.*}} {  nop }
+# CHECK-NEXT: 1000004: {{.*}} {  r0 = #0 ; jump 0x1000400 }
+# CHECK-NEXT: 1000008: {{.*}} {  if (r0==#0) jump:t 0x1004000 }
+# CHECK-NEXT: 100000c: {{.*}} {  if (p0) jump:nt 0x1010000 }
+# CHECK-NEXT: 1000010: {{.*}} {  jump 0x1800000 }

From 5bd0474d1c45d58e472f25bf8292570ac78b5c15 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk@dianqk.net>
Date: Wed, 20 Nov 2024 19:52:51 +0800
Subject: [PATCH 410/427] [LICM] allow MemoryAccess creation failure (#116813)

Fixes #116809.

After running some passes (SimpleLoopUnswitch, LoopInstSimplify, etc.),
MemorySSA might be outdated, and the instruction `I` may have become a
non-memory touching instruction.

LICM has already handled this, but it does not pass
`CreationMustSucceed=false` to `createDefinedAccess`.

(cherry picked from commit 18b02bbf441660683df7f3925946984203d49bab)
---
 llvm/include/llvm/Analysis/MemorySSAUpdater.h |  5 ++
 llvm/lib/Analysis/MemorySSAUpdater.cpp        | 13 ++++-
 llvm/lib/Transforms/Scalar/LICM.cpp           |  5 +-
 .../LICM/PR116813-memoryssa-outdated.ll       | 50 +++++++++++++++++++
 4 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll

diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index d4da3ef1146db..f598dedea75fd 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -192,6 +192,11 @@ class MemorySSAUpdater {
                                        const BasicBlock *BB,
                                        MemorySSA::InsertionPlace Point);
 
+  MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition,
+                                       const BasicBlock *BB,
+                                       MemorySSA::InsertionPlace Point,
+                                       bool CreationMustSucceed);
+
   /// Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
   ///
   /// See createMemoryAccessInBB() for usage details.
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index aa550f0b6a7bf..94061c949b7f8 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -1404,8 +1404,17 @@ void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
 MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
     Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
     MemorySSA::InsertionPlace Point) {
-  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
-  MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
+  return createMemoryAccessInBB(I, Definition, BB, Point,
+                                /*CreationMustSucceed=*/true);
+}
+
+MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
+    Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
+    MemorySSA::InsertionPlace Point, bool CreationMustSucceed) {
+  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(
+      I, Definition, /*Template=*/nullptr, CreationMustSucceed);
+  if (NewAccess)
+    MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
   return NewAccess;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 91ef2b4b7c183..ca03eff7a4e25 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1464,8 +1464,11 @@ static Instruction *cloneInstructionInExitBlock(
 
   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
     // Create a new MemoryAccess and let MemorySSA set its defining access.
+    // After running some passes, MemorySSA might be outdated, and the
+    // instruction `I` may have become a non-memory touching instruction.
     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
-        New, nullptr, New->getParent(), MemorySSA::Beginning);
+        New, nullptr, New->getParent(), MemorySSA::Beginning,
+        /*CreationMustSucceed=*/false);
     if (NewMemAcc) {
       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
diff --git a/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll
new file mode 100644
index 0000000000000..a040c3cc6947c
--- /dev/null
+++ b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch<nontrivial>,licm)' -verify-memoryssa -S < %s | FileCheck %s
+
+; Check that running LICM after SimpleLoopUnswitch does not result in a crash.
+
+define i32 @foo(i1 %arg, ptr %arg1) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[ARG_FR:%.*]] = freeze i1 [[ARG]]
+; CHECK-NEXT:    br i1 [[ARG_FR]], label %[[START_SPLIT_US:.*]], label %[[START_SPLIT:.*]]
+; CHECK:       [[START_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[LOOP_US:.*]]
+; CHECK:       [[LOOP_US]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[UNSWITCHED_SELECT_US:%.*]] = phi ptr [ [[ARG1]], %[[BB0]] ]
+; CHECK-NEXT:    [[I3_US:%.*]] = call i32 [[UNSWITCHED_SELECT_US]]()
+; CHECK-NEXT:    br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]]
+; CHECK:       [[RET_SPLIT_US]]:
+; CHECK-NEXT:    [[I3_LCSSA_US:%.*]] = phi i32 [ [[I3_US]], %[[BB1]] ]
+; CHECK-NEXT:    br label %[[RET:.*]]
+; CHECK:       [[START_SPLIT]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[RET_SPLIT:.*]]
+; CHECK:       [[RET_SPLIT]]:
+; CHECK-NEXT:    [[I3_LE:%.*]] = call i32 @bar()
+; CHECK-NEXT:    br label %[[RET]]
+; CHECK:       [[RET]]:
+; CHECK-NEXT:    [[DOTUS_PHI:%.*]] = phi i32 [ [[I3_LE]], %[[RET_SPLIT]] ], [ [[I3_LCSSA_US]], %[[RET_SPLIT_US]] ]
+; CHECK-NEXT:    ret i32 [[DOTUS_PHI]]
+;
+start:
+  br label %loop
+
+loop:                                              ; preds = %loop, %bb
+  %i = select i1 %arg, ptr %arg1, ptr @bar
+  %i3 = call i32 %i()
+  br i1 %arg, label %loop, label %ret
+
+ret:                                              ; preds = %loop
+  ret i32 %i3
+}
+
+declare i32 @bar() nounwind willreturn memory(none)

From 336f87753b510aed840daf87f8d3a4996e6c8f15 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Thu, 21 Nov 2024 09:31:12 +0800
Subject: [PATCH 411/427] [LoongArch] Fix GOT usage for `non-dso_local`
 function calls in large code model

This commit fixes an issue in the large code model where non-dso_local
function calls did not use the GOT as expected in PIC mode. Instead,
direct PC-relative access was incorrectly applied, leading to linker
errors when building shared libraries.

For `ExternalSymbol`, it is not possible to determine whether it is
dso_local during pseudo-instruction expansion. We use target flags to
differentiate whether GOT should be used.

Cherry-picked from #117099, used for fix linker errors when bulding
shared libraries with large code model.
---
 .../LoongArch/LoongArchExpandPseudoInsts.cpp  |  2 +-
 llvm/test/CodeGen/LoongArch/code-models.ll    | 10 ++---
 .../LoongArch/machinelicm-address-pseudos.ll  | 20 +++++-----
 .../LoongArch/psabi-restricted-scheduling.ll  | 40 +++++++++----------
 llvm/test/CodeGen/LoongArch/tls-models.ll     | 20 +++++-----
 5 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index c136f5b3e515d..e680dda7374d0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -721,7 +721,7 @@ bool LoongArchExpandPseudo::expandFunctionCALL(
         IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
     Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1;
 
-    bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal();
+    bool UseGOT = Func.getTargetFlags() == LoongArchII::MO_CALL_PLT;
     unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO;
     unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D;
     expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg,
diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll
index 4b2b72afaee17..4eb1e5e596fd3 100644
--- a/llvm/test/CodeGen/LoongArch/code-models.ll
+++ b/llvm/test/CodeGen/LoongArch/code-models.ll
@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) {
 ; LARGE-NEXT:    .cfi_offset 1, -8
 ; LARGE-NEXT:    ori $a2, $zero, 1000
 ; LARGE-NEXT:    move $a1, $zero
-; LARGE-NEXT:    pcalau12i $ra, %pc_hi20(memset)
-; LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(memset)
-; LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(memset)
-; LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(memset)
-; LARGE-NEXT:    add.d $ra, $t8, $ra
+; LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(memset)
+; LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(memset)
+; LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(memset)
+; LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(memset)
+; LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE-NEXT:    jirl $ra, $ra, 0
 ; LARGE-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE-NEXT:    addi.d $sp, $sp, 16
diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
index ed1a24e82b4e4..29348fe0d641e 100644
--- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
@@ -282,11 +282,11 @@ define void @test_la_tls_ld(i32 signext %n) {
 ; LA64LARGE-NEXT:  .LBB3_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
@@ -448,11 +448,11 @@ define void @test_la_tls_gd(i32 signext %n) nounwind {
 ; LA64LARGE-NEXT:  .LBB5_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
index 6a15d3a9cda30..75f494f32e476 100644
--- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
@@ -105,11 +105,11 @@ define void @foo() nounwind {
 ; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
 ; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
 ; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
@@ -117,11 +117,11 @@ define void @foo() nounwind {
 ; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
 ; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
@@ -162,11 +162,11 @@ define void @foo() nounwind {
 ; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
 ; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
 ; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
@@ -174,11 +174,11 @@ define void @foo() nounwind {
 ; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
 ; LARGE_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll
index bb89794d1c843..04600ffeb37ee 100644
--- a/llvm/test/CodeGen/LoongArch/tls-models.ll
+++ b/llvm/test/CodeGen/LoongArch/tls-models.ll
@@ -55,11 +55,11 @@ define ptr @f1() nounwind {
 ; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(unspecified)
 ; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(unspecified)
 ; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16
@@ -169,11 +169,11 @@ define ptr @f2() nounwind {
 ; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16

From 11be11b8773bf63abe2c8da72db3ee7c25af524b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Thu, 21 Nov 2024 17:23:04 +0800
Subject: [PATCH 412/427] [SCEV] Fix sext handling for `getConstantMultiple`
 (#117093)

Counterexample: 219 is a multiple of 73. But `sext i8 219 to i16 =
65499` is not.
Fixes https://github.com/llvm/llvm-project/issues/116483.

(cherry picked from commit 458dfbd855806461b4508bf8845cafe0411dbfd4)
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  4 ++-
 .../test/Analysis/ScalarEvolution/pr116483.ll | 26 ++++++++++++++
 .../Transforms/IndVarSimplify/pr116483.ll     | 36 +++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/pr116483.ll
 create mode 100644 llvm/test/Transforms/IndVarSimplify/pr116483.ll

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 51cffac808768..412cfe73d3e55 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6313,8 +6313,10 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) {
     return getConstantMultiple(Z->getOperand()).zext(BitWidth);
   }
   case scSignExtend: {
+    // Only multiples that are a power of 2 will hold after sext.
     const SCEVSignExtendExpr *E = cast<SCEVSignExtendExpr>(S);
-    return getConstantMultiple(E->getOperand()).sext(BitWidth);
+    uint32_t TZ = getMinTrailingZeros(E->getOperand());
+    return GetShiftedByZeros(TZ);
   }
   case scMulExpr: {
     const SCEVMulExpr *M = cast<SCEVMulExpr>(S);
diff --git a/llvm/test/Analysis/ScalarEvolution/pr116483.ll b/llvm/test/Analysis/ScalarEvolution/pr116483.ll
new file mode 100644
index 0000000000000..cc2334e9c64f9
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/pr116483.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
+
+define i16 @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Classifying expressions for: @test
+; CHECK-NEXT:    %xor = xor i32 0, 3
+; CHECK-NEXT:    --> %xor U: [3,4) S: [3,4)
+; CHECK-NEXT:    %mul = mul i32 %xor, 329
+; CHECK-NEXT:    --> (329 * %xor)<nuw><nsw> U: [987,988) S: [987,988)
+; CHECK-NEXT:    %conv = trunc i32 %mul to i16
+; CHECK-NEXT:    --> (329 * (trunc i32 %xor to i16))<nuw><nsw> U: [987,988) S: [987,988)
+; CHECK-NEXT:    %sext = shl i16 %conv, 8
+; CHECK-NEXT:    --> (18688 * (trunc i32 %xor to i16))<nuw> U: [-9472,-9471) S: [-9472,-9471)
+; CHECK-NEXT:    %conv1 = ashr i16 %sext, 8
+; CHECK-NEXT:    --> (sext i8 (73 * (trunc i32 %xor to i8))<nuw> to i16) U: [-37,-36) S: [-37,-36)
+; CHECK-NEXT:  Determining loop execution counts for: @test
+;
+entry:
+  %xor = xor i32 0, 3
+  %mul = mul i32 %xor, 329
+  %conv = trunc i32 %mul to i16
+  %sext = shl i16 %conv, 8
+  %conv1 = ashr i16 %sext, 8
+  ret i16 %conv1
+}
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
new file mode 100644
index 0000000000000..ae108a525223e
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 0, 3
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[XOR]], 329
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[MUL]] to i16
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i16 [[CONV]], 8
+; CHECK-NEXT:    [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT:    ret i32 [[CONV3]]
+;
+entry:
+  %xor = xor i32 0, 3
+  %mul = mul i32 %xor, 329
+  %conv = trunc i32 %mul to i16
+  %sext = shl i16 %conv, 8
+  %conv1 = ashr i16 %sext, 8
+  %conv3 = zext i16 %conv1 to i32
+  br label %loop.body
+
+loop.body:
+  %indvar = phi i32 [ %indvar.inc, %loop.body ], [ 1, %entry ]
+  %indvar.inc = add nuw i32 %indvar, 1
+  %exitcond = icmp eq i32 %indvar, %conv3
+  br i1 %exitcond, label %exit, label %loop.body
+
+exit:
+  ret i32 %conv3
+}

From 7e2da7d262380d5ebaf25dbadd7ad2440f6ce21f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Mon, 18 Nov 2024 23:41:04 +0800
Subject: [PATCH 413/427] [ConstraintElim] Bail out on non-dedicated exits when
 adding exiting conditions (#116627)

This patch bails out non-dedicated exits to avoid adding exiting
conditions to invalid context.
Closes https://github.com/llvm/llvm-project/issues/116553.

(cherry picked from commit 52361d0368b79841be12156bf03cf8c1851e5df7)
---
 .../Scalar/ConstraintElimination.cpp          | 13 +++---
 .../induction-condition-in-loop-exit.ll       | 44 +++++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 37022104d0a9b..d1c80aa671243 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1033,9 +1033,9 @@ void State::addInfoForInductions(BasicBlock &BB) {
       DTN, CmpInst::ICMP_SLT, PN, B,
       ConditionTy(CmpInst::ICMP_SLE, StartValue, B)));
 
-  // Try to add condition from header to the exit blocks. When exiting either
-  // with EQ or NE in the header, we know that the induction value must be u<=
-  // B, as other exits may only exit earlier.
+  // Try to add condition from header to the dedicated exit blocks. When exiting
+  // either with EQ or NE in the header, we know that the induction value must
+  // be u<= B, as other exits may only exit earlier.
   assert(!StepOffset.isNegative() && "induction must be increasing");
   assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
          "unsupported predicate");
@@ -1043,8 +1043,11 @@ void State::addInfoForInductions(BasicBlock &BB) {
   SmallVector<BasicBlock *> ExitBBs;
   L->getExitBlocks(ExitBBs);
   for (BasicBlock *EB : ExitBBs) {
-    WorkList.emplace_back(FactOrCheck::getConditionFact(
-        DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond));
+    // Bail out on non-dedicated exits.
+    if (DT.dominates(&BB, EB)) {
+      WorkList.emplace_back(FactOrCheck::getConditionFact(
+          DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond));
+    }
   }
 }
 
diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
index 15e1d84372627..a04b06e1bf0a5 100644
--- a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
+++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
@@ -763,3 +763,47 @@ exit.2:
   %t.2 = icmp ult i32 %iv, %N
   ret i1 %t.2
 }
+
+define i1 @test_non_dedicated_exit(i16 %n) {
+; CHECK-LABEL: define i1 @test_non_dedicated_exit(
+; CHECK-SAME: i16 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i16 [[N]], 1
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
+; CHECK:       [[LOOP_PREHEADER]]:
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i16 [[N]], -1
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i16 [[SUB]] to i32
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[EXT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[INDVAR_INC]] = add nuw nsw i32 [[INDVAR]], 1
+; CHECK-NEXT:    br label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[N]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cond = icmp slt i16 %n, 1
+  br i1 %cond, label %exit, label %loop.preheader
+
+loop.preheader:
+  %sub = add nsw i16 %n, -1
+  %ext = zext nneg i16 %sub to i32
+  br label %loop
+
+loop:
+  %indvar = phi i32 [ %indvar.inc, %loop.latch ], [ 0, %loop.preheader ]
+  %exitcond = icmp eq i32 %indvar, %ext
+  br i1 %exitcond, label %exit, label %loop.latch
+
+loop.latch:
+  %indvar.inc = add nuw nsw i32 %indvar, 1
+  br label %loop
+
+exit:
+  %cmp = icmp sgt i16 %n, 0
+  ret i1 %cmp
+}

From 32cbe24de3f2ecb1c77899ff27bfe70bb033ecde Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 20 Nov 2024 15:10:19 +0000
Subject: [PATCH 414/427] [MachineLICM] Add test case showing load hoisted
 across memory barrier.

(cherry picked from commit a9b3ec154d7ab2d0896ac5c9f1e9a1266a37be80)
---
 .../AArch64/machine-licm-hoist-load.ll        | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index e8dafd5e8fbab..932a5af264a00 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -497,6 +497,35 @@ for.exit:                                 ; preds = %for.body
   ret i64 %spec.select
 }
 
+@a = external local_unnamed_addr global i32, align 4
+
+; FIXME: Load hoisted out of the loop across memory barriers.
+define i32 @load_between_memory_barriers() {
+; CHECK-LABEL: load_between_memory_barriers:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, :got:a
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:  .LBB8_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    cbz w0, .LBB8_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    ret
+  br label %loop
+
+loop:
+  fence syncscope("singlethread") acq_rel
+  %l = load i32, ptr @a, align 4
+  fence syncscope("singlethread") acq_rel
+  %c = icmp eq i32 %l, 0
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret i32 %l
+}
+
 declare i32 @bcmp(ptr, ptr, i64)
 declare i32 @memcmp(ptr, ptr, i64)
 declare void @func()

From 086d8e6bb5daf8de43880ba90258c49e0fabf2c9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 21 Nov 2024 10:25:04 +0000
Subject: [PATCH 415/427] [MachineLICM] Don't allow hoisting invariant loads
 across mem barrier. (#116987)

The improvements in 63917e1 / #70796 do not check for memory
barriers/unmodelled sideeffects, which means we may incorrectly hoist
loads across memory barriers.

Fix this by checking any machine instruction in the loop is a load-fold
barrier.

PR: https://github.com/llvm/llvm-project/pull/116987
(cherry picked from commit ef102b4a6333a304e36dc623d5381257a7ef1ed6)
---
 llvm/lib/CodeGen/MachineLICM.cpp                     | 2 +-
 llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll | 4 ++--
 llvm/test/CodeGen/Mips/lcb5.ll                       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index f24ab187ef400..21a02a6f09478 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1474,7 +1474,7 @@ void MachineLICMBase::InitializeLoadsHoistableLoops() {
       if (!AllowedToHoistLoads[Loop])
         continue;
       for (auto &MI : *MBB) {
-        if (!MI.mayStore() && !MI.isCall() &&
+        if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() &&
             !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
           continue;
         for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 932a5af264a00..17f8263560430 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -499,16 +499,16 @@ for.exit:                                 ; preds = %for.body
 
 @a = external local_unnamed_addr global i32, align 4
 
-; FIXME: Load hoisted out of the loop across memory barriers.
+; Make sure the load is not hoisted out of the loop across memory barriers.
 define i32 @load_between_memory_barriers() {
 ; CHECK-LABEL: load_between_memory_barriers:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w0, [x8]
 ; CHECK-NEXT:  .LBB8_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    ldr w0, [x8]
 ; CHECK-NEXT:    //MEMBARRIER
 ; CHECK-NEXT:    cbz w0, .LBB8_1
 ; CHECK-NEXT:  // %bb.2: // %exit
diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll
index f320f6fc5660c..bb059f1ee8453 100644
--- a/llvm/test/CodeGen/Mips/lcb5.ll
+++ b/llvm/test/CodeGen/Mips/lcb5.ll
@@ -186,7 +186,7 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 ; ci:	.ent	z3
-; ci:	bteqz	$BB6_3
+; ci:	bteqz	$BB6_2
 ; ci:	.end	z3
 
 ; Function Attrs: nounwind optsize
@@ -210,7 +210,7 @@ if.end:                                           ; preds = %if.then, %entry
 
 ; ci:	.ent	z4
 ; ci:	btnez	$BB7_1  # 16 bit inst
-; ci:	jal	$BB7_3	# branch
+; ci:	jal	$BB7_2	# branch
 ; ci:	nop
 ; ci: $BB7_1:
 ; ci:	.p2align	2

From 0e7e5d9bdf3c130069a1d622dc9a2b71357fc1ee Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 20:06:34 +0800
Subject: [PATCH 416/427] [InstCombine] Drop noundef attributes in
 `foldCttzCtlz` (#116718)

Closes https://github.com/llvm/llvm-project/issues/112068.

(cherry picked from commit a59976bea8ad76f18119a11391dc8ba3e6ba07d5)
---
 .../Transforms/InstCombine/InstCombineCalls.cpp  |  4 +++-
 .../Transforms/InstCombine/shift-cttz-ctlz.ll    | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9d2990c98ce27..3223fccbcf49a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -506,8 +506,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
 
   // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
   if (II.hasOneUse() && match(Op1, m_Zero()) &&
-      match(II.user_back(), m_Shift(m_Value(), m_Specific(&II))))
+      match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
+    II.dropUBImplyingAttrsAndMetadata();
     return IC.replaceOperand(II, 1, IC.Builder.getTrue());
+  }
 
   Constant *C;
 
diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
index 1c381d0839071..63caec9501325 100644
--- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
@@ -15,6 +15,22 @@ entry:
   ret i32 %res
 }
 
+; Make sure that noundef is dropped.
+
+define i32 @shl_cttz_false_noundef(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_cttz_false_noundef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[Y]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X]], [[CTTZ]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %cttz = call noundef i32 @llvm.cttz.i32(i32 %y, i1 false)
+  %res = shl i32 %x, %cttz
+  ret i32 %res
+}
+
 define i32 @shl_ctlz_false(i32 %x, i32 %y) {
 ; CHECK-LABEL: define i32 @shl_ctlz_false(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {

From edded5af5494adfc53187719fa3f3b0be7a4a20e Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Fri, 18 Oct 2024 13:44:57 -0700
Subject: [PATCH 417/427] [SLP][NFC]Add a test with the incorrect casting of
 the abs argument, NFC

(cherry picked from commit 825f9cb1b31aa91d23eba803003897490de74a20)
---
 .../abs-overflow-incorrect-minbws.ll          | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
new file mode 100644
index 0000000000000..a936b076138d0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define i32 @test(i32 %n) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 273837369, i32 273837369>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    ret i32 [[RES1]]
+;
+entry:
+  %n1 = add i32 %n, 1
+  %zn1 = zext nneg i32 %n1 to i64
+  %m1 = mul nuw nsw i64 %zn1, 273837369
+  %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true)
+  %t1 = trunc i64 %a1 to i32
+  %n2 = add i32 %n, 2
+  %zn2 = zext nneg i32 %n2 to i64
+  %m2 = mul nuw nsw i64 %zn2, 273837369
+  %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true)
+  %t2 = trunc i64 %a2 to i32
+  %res1 = add i32 %t1, %t2
+  ret i32 %res1
+}

From 9f72c9837c553063ab0cbacc1a472a73c0ec2a4b Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Fri, 18 Oct 2024 13:54:30 -0700
Subject: [PATCH 418/427] [SLP]Check that operand of abs does not overflow
 before making it part of minbitwidth transformation

Need to check that the operand of the abs intrinsic can be safely
truncated before making it part of the minbitwidth transformation.

Fixes #112577

(cherry picked from commit 709abacdc350d63c61888607edb28ce272daa0a0)
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp  | 16 ++++++++++++++++
 .../abs-overflow-incorrect-minbws.ll             |  6 ++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ab2b96cdc42db..746ba51a981fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote(
                 MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
       });
     };
+    auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
+      assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        unsigned SignBits = OrigBitWidth - BitWidth;
+        APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
+        unsigned Op0SignBits =
+            ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
+        return SignBits <= Op0SignBits &&
+               ((SignBits != Op0SignBits &&
+                 !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
+                MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)));
+      });
+    };
     if (ID != Intrinsic::abs) {
       Operands.push_back(getOperandEntry(&E, 1));
       CallChecker = CompChecker;
+    } else {
+      CallChecker = AbsChecker;
     }
     InstructionCost BestCost =
         std::numeric_limits<InstructionCost::CostType>::max();
diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
index a936b076138d0..51b635837d3b5 100644
--- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
+++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
@@ -8,8 +8,10 @@ define i32 @test(i32 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 273837369, i32 273837369>
-; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], <i64 273837369, i64 273837369>
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]

From dc665fa5f5b8b572479ceac6bf32e0174de65f1e Mon Sep 17 00:00:00 2001
From: Zhaoxin Yang <yangzhaoxin@loongson.cn>
Date: Mon, 11 Nov 2024 16:46:22 +0800
Subject: [PATCH 419/427] [MC][LoongArch] Change default cpu in
 `MCSubtargetInfo`. (#114922)

The default value of this CPU affects the `FeatureBits` obtained by
`LoongArchTargetELFStreamer` when creating an ELF file, and it will
further affect the `Flags` field in the generated file.

So, the default CPU value should be consistent with the
`initializeSubtargetDependencies` in `LoongArchSubtarget.cpp`.
Otherwise, the `Flags` field may be unexpected.

(cherry picked from commit 75c2888209473884cb3fa5720899d8199dafb8cb)
---
 lld/test/ELF/emulation-loongarch.s                          | 2 +-
 lld/test/ELF/loongarch-interlink.test                       | 4 ++--
 .../Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp | 2 +-
 llvm/test/CodeGen/LoongArch/e_flags.ll                      | 6 ++++++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s
index 28b879f758468..cfa8df4d8e2fe 100644
--- a/lld/test/ELF/emulation-loongarch.s
+++ b/lld/test/ELF/emulation-loongarch.s
@@ -37,7 +37,7 @@
 # LA32-NEXT:   StringTableSectionIndex:
 # LA32-NEXT: }
 
-# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+d %s -o %t.o
 # RUN: ld.lld %t.o -o %t
 # RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s
 # RUN: ld.lld -m elf64loongarch %t.o -o %t
diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test
index 44e5d03409a47..15c8318512660 100644
--- a/lld/test/ELF/loongarch-interlink.test
+++ b/lld/test/ELF/loongarch-interlink.test
@@ -3,9 +3,9 @@
 
 # RUN: yaml2obj %t/blob.yaml -o %t/blob.o
 # RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o
-# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/start.s -o %t/v1-lp64d.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o
-# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/bar.s -o %t/v1-b-lp64d.o
 
 ## Check that binary input results in e_flags=0 output.
 # RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index e40981f5b5cd5..0712cc01ea038 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() {
 static MCSubtargetInfo *
 createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (CPU.empty() || CPU == "generic")
-    CPU = TT.isArch64Bit() ? "la464" : "generic-la32";
+    CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32";
   return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
 }
 
diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll
index 2feb9d832bca9..9b2dc87eb353d 100644
--- a/llvm/test/CodeGen/LoongArch/e_flags.ll
+++ b/llvm/test/CodeGen/LoongArch/e_flags.ll
@@ -1,3 +1,6 @@
+; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32s
+; RUN: llvm-readelf -h %t-la32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines
+
 ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s -o %t-la32
 ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
 
@@ -10,6 +13,9 @@
 ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d
 ; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
 
+; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64d
+; RUN: llvm-readelf -h %t-la64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
+
 ; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64
 ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
 

From f64f76feab83859b37b7fa5de3d4bba9a446e72b Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya@msn.com>
Date: Tue, 10 Sep 2024 22:39:02 -0700
Subject: [PATCH 420/427] Bail out jump threading on indirect branches
 (#103688)

The bug was introduced by
https://github.com/llvm/llvm-project/pull/68473

Fixes: #102351
(cherry picked from commit 3c9022c965b85951f30af140da591f819acef8a0)
---
 llvm/lib/Transforms/Utils/Local.cpp           |  11 +-
 .../switch-branch-fold-indirectbr-102351.ll   | 104 ++++++++++++++++++
 2 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7192efe3f16b9..4eb8dc1d2d615 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1028,7 +1028,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
   if (!BB->hasNPredecessorsOrMore(2))
     return false;
 
-  // Get single common predecessors of both BB and Succ
+  if (any_of(BBPreds, [](const BasicBlock *Pred) {
+        return isa<PHINode>(Pred->begin()) &&
+               isa<IndirectBrInst>(Pred->getTerminator());
+      }))
+    return false;
+
+  // Get the single common predecessor of both BB and Succ. Return false
+  // when there are more than one common predecessors.
   for (BasicBlock *SuccPred : SuccPreds) {
     if (BBPreds.count(SuccPred)) {
       if (CommonPred)
@@ -1133,7 +1140,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
 
   bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds);
 
-  // Even if we can not fold bB into Succ, we may be able to redirect the
+  // Even if we can not fold BB into Succ, we may be able to redirect the
   // predecessors of BB to Succ.
   bool BBPhisMergeable =
       BBKillable ||
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
new file mode 100644
index 0000000000000..03aee68fa4248
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local noundef i32 @main() {
+; CHECK-LABEL: define dso_local noundef i32 @main() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x ptr], align 16
+; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1
+; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB7]] [
+; CHECK-NEXT:      i32 0, label %[[BB12:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB4]]
+; CHECK-NEXT:      i32 2, label %[[BB6:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ]
+; CHECK-NEXT:    br label %[[BB7]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI2]], 1
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ]
+; CHECK-NEXT:    [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ]
+; CHECK-NEXT:    br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[BB10]]:
+; CHECK-NEXT:    [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]])
+; CHECK-NEXT:    ret i32 0
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ]
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[PHI13]] to i64
+; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10]
+;
+bb:
+  %alloca = alloca [2 x ptr], align 16
+  store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0
+  %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1
+  store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0
+  br label %bb1
+
+bb1:                                              ; preds = %bb7, %bb
+  %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ]
+  %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ]
+  switch i32 %phi, label %bb7 [
+  i32 0, label %bb3
+  i32 1, label %bb4
+  i32 2, label %bb6
+  ]
+
+bb3:                                              ; preds = %bb1
+  br label %bb12
+
+bb4:                                              ; preds = %bb12, %bb1
+  %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ]
+  br label %bb7
+
+bb6:                                              ; preds = %bb1
+  %call = call i32 @foo(i32 noundef %phi2)
+  %add = add nsw i32 %phi2, 1
+  br label %bb12
+
+bb7:                                              ; preds = %bb4, %bb1
+  %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ]
+  %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ]
+  br label %bb1, !llvm.loop !4
+
+bb10:                                             ; preds = %bb12
+  %call11 = call i32 @foo(i32 noundef %phi13)
+  ret i32 0
+
+bb12:                                             ; preds = %bb6, %bb3
+  %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ]
+  %sext = sext i32 %phi13 to i64
+  %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext
+  %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0
+  indirectbr ptr %load, [label %bb4, label %bb10]
+}
+
+declare i32 @foo(i32)
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !2, i64 0}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
+;.

From 321f0dd2008160b674c010425133bebc586392e7 Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya@msn.com>
Date: Tue, 26 Nov 2024 14:57:28 -0800
Subject: [PATCH 421/427] Bail out jump threading on indirect branches only
 (#117778)

Remove check for PHI in pred as pointed out in #103688
Reduced the testcase to remove redundant phi in pred

Fixes: #102351
(cherry picked from commit 39601a6e5484de183bf525b7d0624e7890ccd8ab)
---
 llvm/lib/Transforms/Utils/Local.cpp           |   3 +-
 .../switch-branch-fold-indirectbr-102351.ll   | 141 ++++++++----------
 2 files changed, 60 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 4eb8dc1d2d615..f68cbf62b9825 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1029,8 +1029,7 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
     return false;
 
   if (any_of(BBPreds, [](const BasicBlock *Pred) {
-        return isa<PHINode>(Pred->begin()) &&
-               isa<IndirectBrInst>(Pred->getTerminator());
+        return isa<IndirectBrInst>(Pred->getTerminator());
       }))
     return false;
 
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
index 03aee68fa4248..d3713be8358db 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
@@ -1,104 +1,81 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name pref --version 5
 ; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
 
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local noundef i32 @main() {
-; CHECK-LABEL: define dso_local noundef i32 @main() {
+define i32 @foo.1(i32 %arg, ptr %arg1) {
+; CHECK-LABEL: define i32 @foo.1(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]]) {
 ; CHECK-NEXT:  [[BB:.*]]:
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x ptr], align 16
-; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store ptr blockaddress(@foo.1, %[[BB8:.*]]), ptr [[ALLOCA]], align 16
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1
-; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label %[[BB1:.*]]
-; CHECK:       [[BB1]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ]
-; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB7]] [
-; CHECK-NEXT:      i32 0, label %[[BB12:.*]]
-; CHECK-NEXT:      i32 1, label %[[BB4]]
-; CHECK-NEXT:      i32 2, label %[[BB6:.*]]
+; CHECK-NEXT:    store ptr blockaddress(@foo.1, %[[BB16:.*]]), ptr [[GETELEMENTPTR]], align 8
+; CHECK-NEXT:    br label %[[PREFBB2:.*]]
+; CHECK:       [[PREFBB2]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI14:%.*]], %[[BB13:.*]] ]
+; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI15:%.*]], %[[BB13]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB13]] [
+; CHECK-NEXT:      i32 0, label %[[PREFBB18:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB8]]
+; CHECK-NEXT:      i32 2, label %[[PREFBB11:.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       [[BB4]]:
-; CHECK-NEXT:    [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ]
-; CHECK-NEXT:    br label %[[BB7]]
-; CHECK:       [[BB6]]:
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI2]], 1
-; CHECK-NEXT:    br label %[[BB12]]
-; CHECK:       [[BB7]]:
-; CHECK-NEXT:    [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ]
-; CHECK-NEXT:    [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ]
-; CHECK-NEXT:    br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       [[BB10]]:
-; CHECK-NEXT:    [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]])
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[PHI10:%.*]] = phi i32 [ [[ARG]], %[[PREFBB18]] ], [ [[PHI3]], %[[PREFBB2]] ]
+; CHECK-NEXT:    br label %[[BB13]]
+; CHECK:       [[PREFBB11]]:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @wombat(i32 noundef [[PHI3]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI3]], 1
+; CHECK-NEXT:    br label %[[PREFBB18]]
+; CHECK:       [[BB13]]:
+; CHECK-NEXT:    [[PHI14]] = phi i32 [ [[PHI]], %[[PREFBB2]] ], [ 2, %[[BB8]] ]
+; CHECK-NEXT:    [[PHI15]] = phi i32 [ [[PHI3]], %[[PREFBB2]] ], [ [[PHI10]], %[[BB8]] ]
+; CHECK-NEXT:    br label %[[PREFBB2]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[CALL17:%.*]] = call i32 @wombat(i32 noundef [[ARG]])
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       [[BB12]]:
-; CHECK-NEXT:    [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ]
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[PHI13]] to i64
-; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10]
+; CHECK:       [[PREFBB18]]:
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[ARG1]], align 8
+; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB8]], label %bb16]
 ;
 bb:
   %alloca = alloca [2 x ptr], align 16
-  store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0
+  store ptr blockaddress(@foo.1, %bb8), ptr %alloca, align 16
   %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1
-  store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0
-  br label %bb1
+  store ptr blockaddress(@foo.1, %bb16), ptr %getelementptr, align 8
+  br label %bb2
 
-bb1:                                              ; preds = %bb7, %bb
-  %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ]
-  %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ]
-  switch i32 %phi, label %bb7 [
-  i32 0, label %bb3
-  i32 1, label %bb4
-  i32 2, label %bb6
+bb2:                                              ; preds = %bb13, %bb
+  %phi = phi i32 [ 0, %bb ], [ %phi14, %bb13 ]
+  %phi3 = phi i32 [ 0, %bb ], [ %phi15, %bb13 ]
+  switch i32 %phi, label %bb13 [
+  i32 0, label %bb5
+  i32 1, label %bb8
+  i32 2, label %bb11
   ]
 
-bb3:                                              ; preds = %bb1
-  br label %bb12
+bb5:                                              ; preds = %bb2
+  br label %bb18
 
-bb4:                                              ; preds = %bb12, %bb1
-  %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ]
-  br label %bb7
+bb8:                                              ; preds = %bb18, %bb2
+  %phi10 = phi i32 [ %arg, %bb18 ], [ %phi3, %bb2 ]
+  br label %bb13
 
-bb6:                                              ; preds = %bb1
-  %call = call i32 @foo(i32 noundef %phi2)
-  %add = add nsw i32 %phi2, 1
-  br label %bb12
+bb11:                                             ; preds = %bb2
+  %call = call i32 @wombat(i32 noundef %phi3)
+  %add = add nsw i32 %phi3, 1
+  br label %bb18
 
-bb7:                                              ; preds = %bb4, %bb1
-  %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ]
-  %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ]
-  br label %bb1, !llvm.loop !4
+bb13:                                             ; preds = %bb8, %bb2
+  %phi14 = phi i32 [ %phi, %bb2 ], [ 2, %bb8 ]
+  %phi15 = phi i32 [ %phi3, %bb2 ], [ %phi10, %bb8 ]
+  br label %bb2
 
-bb10:                                             ; preds = %bb12
-  %call11 = call i32 @foo(i32 noundef %phi13)
+bb16:                                             ; preds = %bb18
+  %call17 = call i32 @wombat(i32 noundef %arg)
   ret i32 0
 
-bb12:                                             ; preds = %bb6, %bb3
-  %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ]
-  %sext = sext i32 %phi13 to i64
-  %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext
-  %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0
-  indirectbr ptr %load, [label %bb4, label %bb10]
+bb18:                                             ; preds = %bb11, %bb5
+  %load = load ptr, ptr %arg1, align 8
+  indirectbr ptr %load, [label %bb8, label %bb16]
 }
 
-declare i32 @foo(i32)
-
-!0 = !{!1, !1, i64 0}
-!1 = !{!"any pointer", !2, i64 0}
-!2 = !{!"omnipotent char", !3, i64 0}
-!3 = !{!"Simple C++ TBAA"}
-!4 = !{!5, !5, i64 0}
-!5 = !{!"int", !2, i64 0}
-;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
-; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
-; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
-; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
-; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
-;.
+declare i32 @wombat(i32)

From e6bcdea700dc7a5b1cf3a3f5a4eaa40d1234e220 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Fri, 29 Nov 2024 15:31:02 +0530
Subject: [PATCH 422/427] [clang-repl] Fix generation of wasm binaries while
 running clang-repl in browser (#117978)

Co-authored-by: Vassil Vassilev <v.g.vassilev@gmail.com>
(cherry picked from commit a174aa1e416c4e27945f5a8c646b119126dc8441)
---
 clang/lib/Interpreter/CMakeLists.txt  |  2 +
 clang/lib/Interpreter/Interpreter.cpp |  1 +
 clang/lib/Interpreter/Wasm.cpp        | 57 ++++++++++++++++++++-------
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt
index 0a2d60757c216..85efa4b0f984f 100644
--- a/clang/lib/Interpreter/CMakeLists.txt
+++ b/clang/lib/Interpreter/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS)
   set(WASM_SRC Wasm.cpp)
   set(WASM_LINK lldWasm)
+  set(COMMON_LINK lldCommon)
 endif()
 
 add_clang_library(clangInterpreter
@@ -45,6 +46,7 @@ add_clang_library(clangInterpreter
   clangSema
   clangSerialization
   ${WASM_LINK}
+  ${COMMON_LINK}
   )
 
 if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS)
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index c0b8bfebb4343..985d0b7c0ef31 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -193,6 +193,7 @@ IncrementalCompilerBuilder::CreateCpp() {
   Argv.push_back("-target");
   Argv.push_back("wasm32-unknown-emscripten");
   Argv.push_back("-shared");
+  Argv.push_back("-fvisibility=default");
 #endif
   Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
 
diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp
index 79efbaa03982d..aa10b160ccf84 100644
--- a/clang/lib/Interpreter/Wasm.cpp
+++ b/clang/lib/Interpreter/Wasm.cpp
@@ -23,6 +23,31 @@
 #include <string>
 
 namespace lld {
+enum Flavor {
+  Invalid,
+  Gnu,     // -flavor gnu
+  MinGW,   // -flavor gnu MinGW
+  WinLink, // -flavor link
+  Darwin,  // -flavor darwin
+  Wasm,    // -flavor wasm
+};
+
+using Driver = bool (*)(llvm::ArrayRef<const char *>, llvm::raw_ostream &,
+                        llvm::raw_ostream &, bool, bool);
+
+struct DriverDef {
+  Flavor f;
+  Driver d;
+};
+
+struct Result {
+  int retCode;
+  bool canRunAgain;
+};
+
+Result lldMain(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+               llvm::raw_ostream &stderrOS, llvm::ArrayRef<DriverDef> drivers);
+
 namespace wasm {
 bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
           llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
@@ -51,13 +76,14 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
       PTU.TheModule->getTargetTriple(), "", "", TO, llvm::Reloc::Model::PIC_);
   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
-  std::string OutputFileName = PTU.TheModule->getName().str() + ".wasm";
+  std::string ObjectFileName = PTU.TheModule->getName().str() + ".o";
+  std::string BinaryFileName = PTU.TheModule->getName().str() + ".wasm";
 
   std::error_code Error;
-  llvm::raw_fd_ostream OutputFile(llvm::StringRef(OutputFileName), Error);
+  llvm::raw_fd_ostream ObjectFileOutput(llvm::StringRef(ObjectFileName), Error);
 
   llvm::legacy::PassManager PM;
-  if (TargetMachine->addPassesToEmitFile(PM, OutputFile, nullptr,
+  if (TargetMachine->addPassesToEmitFile(PM, ObjectFileOutput, nullptr,
                                          llvm::CodeGenFileType::ObjectFile)) {
     return llvm::make_error<llvm::StringError>(
         "Wasm backend cannot produce object.", llvm::inconvertibleErrorCode());
@@ -69,27 +95,30 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
                                                llvm::inconvertibleErrorCode());
   }
 
-  OutputFile.close();
+  ObjectFileOutput.close();
 
   std::vector<const char *> LinkerArgs = {"wasm-ld",
                                           "-shared",
                                           "--import-memory",
-                                          "--no-entry",
-                                          "--export-all",
                                           "--experimental-pic",
                                           "--stack-first",
                                           "--allow-undefined",
-                                          OutputFileName.c_str(),
+                                          ObjectFileName.c_str(),
                                           "-o",
-                                          OutputFileName.c_str()};
-  int Result =
-      lld::wasm::link(LinkerArgs, llvm::outs(), llvm::errs(), false, false);
-  if (!Result)
+                                          BinaryFileName.c_str()};
+
+  const lld::DriverDef WasmDriver = {lld::Flavor::Wasm, &lld::wasm::link};
+  std::vector<lld::DriverDef> WasmDriverArgs;
+  WasmDriverArgs.push_back(WasmDriver);
+  lld::Result Result =
+      lld::lldMain(LinkerArgs, llvm::outs(), llvm::errs(), WasmDriverArgs);
+
+  if (Result.retCode)
     return llvm::make_error<llvm::StringError>(
         "Failed to link incremental module", llvm::inconvertibleErrorCode());
 
   void *LoadedLibModule =
-      dlopen(OutputFileName.c_str(), RTLD_NOW | RTLD_GLOBAL);
+      dlopen(BinaryFileName.c_str(), RTLD_NOW | RTLD_GLOBAL);
   if (LoadedLibModule == nullptr) {
     llvm::errs() << dlerror() << '\n';
     return llvm::make_error<llvm::StringError>(
@@ -109,7 +138,7 @@ llvm::Error WasmIncrementalExecutor::runCtors() const {
   return llvm::Error::success();
 }
 
-llvm::Error WasmIncrementalExecutor::cleanUp() const {
+llvm::Error WasmIncrementalExecutor::cleanUp() {
   // Can't call cleanUp through IncrementalExecutor as it
   // tries to deinitialize JIT which hasn't been initialized
   return llvm::Error::success();
@@ -117,4 +146,4 @@ llvm::Error WasmIncrementalExecutor::cleanUp() const {
 
 WasmIncrementalExecutor::~WasmIncrementalExecutor() = default;
 
-} // namespace clang
+} // namespace clang
\ No newline at end of file

From cf55b9cb8af3ce3fa776e46e6ad71155a7d38c75 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Tue, 13 Aug 2024 00:39:14 -0700
Subject: [PATCH 423/427] [AArch64][Darwin][SME] Don't try to save VG to the
 stack for unwinding.

On Darwin we don't have any hardware that has SVE support, only SME.
Therefore we don't need to save VG for unwinders and can safely omit it.

This also fixes crashes introduced since this feature landed since Darwin's
compact unwind code can't handle the presence of VG anyway.

rdar://131072344
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  24 ++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  19 ++-
 .../CodeGen/AArch64/sme-darwin-no-sve-vg.ll   | 161 ++++++++++++++++++
 3 files changed, 189 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87e057a468afd..83d9dd1725973 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
 }
 
+static bool requiresSaveVG(MachineFunction &MF) {
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+  // is enabled with streaming mode changes.
+  if (!AFI->hasStreamingModeChanges())
+    return false;
+  auto &ST = MF.getSubtarget<AArch64Subtarget>();
+  if (ST.isTargetDarwin())
+    return ST.hasSVE();
+  return true;
+}
+
 bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
   unsigned Opc = MBBI->getOpcode();
   if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   // functions, we need to do this for both the streaming and non-streaming
   // vector length. Move past these instructions if necessary.
   MachineFunction &MF = *MBB.getParent();
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  if (AFI->hasStreamingModeChanges())
+  if (requiresSaveVG(MF))
     while (isVGInstruction(MBBI))
       ++MBBI;
 
@@ -1937,7 +1948,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
          !IsSVECalleeSave(MBBI)) {
     // Move past instructions generated to calculate VG
-    if (AFI->hasStreamingModeChanges())
+    if (requiresSaveVG(MF))
       while (isVGInstruction(MBBI))
         ++MBBI;
 
@@ -3720,7 +3731,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // non-streaming VG value.
   const Function &F = MF.getFunction();
   SMEAttrs Attrs(F);
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
       CSStackSize += 16;
     else
@@ -3873,7 +3884,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
   }
 
   // Insert VG into the list of CSRs, immediately before LR if saved.
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     std::vector<CalleeSavedInfo> VGSaves;
     SMEAttrs Attrs(MF.getFunction());
 
@@ -4602,10 +4613,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
 
 void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
     MachineFunction &MF, RegScavenger *RS = nullptr) const {
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   for (auto &BB : MF)
     for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
-      if (AFI->hasStreamingModeChanges())
+      if (requiresSaveVG(MF))
         II = emitVGSaveRestore(II, this);
       if (StackTaggingMergeSetTag)
         II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62078822c89b1..ef2789e96213b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   SDValue InGlue;
   if (RequiresSMChange) {
-
-    Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
-                        DAG.getVTList(MVT::Other, MVT::Glue), Chain);
-    InGlue = Chain.getValue(1);
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+                          DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+      InGlue = Chain.getValue(1);
+    }
 
     SDValue NewChain = changeStreamingMode(
         DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Result = changeStreamingMode(
         DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
         getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
-    InGlue = Result.getValue(1);
 
-    Result =
-        DAG.getNode(AArch64ISD::VG_RESTORE, DL,
-                    DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      InGlue = Result.getValue(1);
+      Result =
+          DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+                      DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    }
   }
 
   if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 0000000000000..36a300fea25e5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -24
+; CHECK-NEXT:    .cfi_offset b9, -32
+; CHECK-NEXT:    .cfi_offset b10, -40
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    .cfi_offset b12, -56
+; CHECK-NEXT:    .cfi_offset b13, -64
+; CHECK-NEXT:    .cfi_offset b14, -72
+; CHECK-NEXT:    .cfi_offset b15, -80
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    bl __ZL9sme_crashv
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    mov w0, #0 ; =0x0
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+entry:
+  tail call fastcc void @_ZL9sme_crashv() #4
+  ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w27, -24
+; CHECK-NEXT:    .cfi_offset w28, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    sub x9, sp, #160
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x8, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    str x8, [sp, #152]
+; CHECK-NEXT:    mov z0.b, #0 ; =0x0
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [sp, #152]
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    adrp x9, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    ldr x9, [x9]
+; CHECK-NEXT:    cmp x9, x8
+; CHECK-NEXT:    b.ne LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %entry
+; CHECK-NEXT:    sub sp, x29, #80
+; CHECK-NEXT:    .cfi_def_cfa wsp, 96
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore w27
+; CHECK-NEXT:    .cfi_restore w28
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_2: ; %entry
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl ___stack_chk_fail
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+  %uu = alloca [16 x float], align 256
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+  call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }

From 4716c4752fd56692bcca775606a2650418eb3be8 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Mon, 19 Aug 2024 10:17:10 +0100
Subject: [PATCH 424/427] [AArch64][SME] Return false from
 produceCompactUnwindFrame if VG save required. (#104588)

The compact unwind format requires all registers are stored in pairs, so
return false from produceCompactUnwindFrame if we require saving VG.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  3 +-
 .../test/CodeGen/AArch64/sme-darwin-sve-vg.ll | 55 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 83d9dd1725973..c09c1792e9898 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2859,7 +2859,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
   return Subtarget.isTargetMachO() &&
          !(Subtarget.getTargetLowering()->supportSwiftError() &&
            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
-         MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
+         MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
+         !requiresSaveVG(MF);
 }
 
 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
new file mode 100644
index 0000000000000..c32e9cbc05393
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+declare void @normal_callee();
+
+define void @locally_streaming_fn() #0 {
+; CHECK-LABEL: locally_streaming_fn:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    rdsvl x9, #1
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    lsr x9, x9, #3
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x9, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    str x9, [sp, #80] ; 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset vg, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .cfi_offset vg, -24
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl _normal_callee
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .cfi_restore vg
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] ; 8-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+  call void @normal_callee()
+  ret void
+}
+
+attributes #0 = { "aarch64_pstate_sm_body" uwtable(async) }

From 876d0501d312b7303423fc4d3c388174a1465ca5 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Tue, 15 Oct 2024 11:56:40 +0100
Subject: [PATCH 425/427] [AArch64][SME] Fix iterator to
 fixupCalleeSaveRestoreStackOffset (#110855)

The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be
incorrect when it tries to skip over the instructions that get the
current value of 'vg', when there is a 'rdsvl' instruction straight
after the prologue. That's because it doesn't check that the instruction
is still a 'frame-setup' instruction.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  9 ++---
 llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll  | 38 +++++++++++++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c09c1792e9898..c183ffd384c22 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1947,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // pointer bump above.
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
          !IsSVECalleeSave(MBBI)) {
-    // Move past instructions generated to calculate VG
-    if (requiresSaveVG(MF))
-      while (isVGInstruction(MBBI))
-        ++MBBI;
-
-    if (CombineSPBump)
+    if (CombineSPBump &&
+        // Only fix-up frame-setup load/store instructions.
+        (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
                                         NeedsWinCFI, &HasWinCFI);
     ++MBBI;
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index fa8f92cb0a2c9..38666a05c20f8 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
   ret void
 }
 
+; The algorithm that fixes up the offsets of the callee-save/restore
+; instructions must jump over the instructions that instantiate the current
+; 'VG' value. We must make sure that it doesn't consider any RDSVL in
+; user-code as if it is part of the frame-setup when doing so.
+define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
+; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
+; NO-SVE-CHECK:     // %bb.0:
+; NO-SVE-CHECK-NEXT: stp     d15, d14, [sp, #-96]!           // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d13, d12, [sp, #16]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x9, x0
+; NO-SVE-CHECK-NEXT: stp     d11, d10, [sp, #32]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d9, d8, [sp, #48]               // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     x29, x30, [sp, #64]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: bl      __arm_get_current_vg
+; NO-SVE-CHECK-NEXT: str     x0, [sp, #80]                   // 8-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x0, x9
+; NO-SVE-CHECK-NEXT: rdsvl   x8, #1
+; NO-SVE-CHECK-NEXT: add     x29, sp, #64
+; NO-SVE-CHECK-NEXT: lsr     x8, x8, #3
+; NO-SVE-CHECK-NEXT: mov     x1, x0
+; NO-SVE-CHECK-NEXT: smstart sm
+; NO-SVE-CHECK-NEXT: mov     x0, x8
+; NO-SVE-CHECK-NEXT: bl      bar
+; NO-SVE-CHECK-NEXT: smstop  sm
+; NO-SVE-CHECK-NEXT: ldp     x29, x30, [sp, #64]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d9, d8, [sp, #48]               // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d11, d10, [sp, #32]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d13, d12, [sp, #16]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d15, d14, [sp], #96             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ret
+  %some_alloc = alloca i64, align 8
+  %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
+  call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+  ret void
+}
+
+declare void @bar(i64, i64)
+
 ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
 ; if the function contains a streaming-mode change.
 

From ab4b5a2db582958af1ee308a790cfdb42bd24720 Mon Sep 17 00:00:00 2001
From: Brian Cain <bcain@quicinc.com>
Date: Fri, 29 Nov 2024 16:42:44 -0600
Subject: [PATCH 426/427] [clang] recognize hexagon-*-ld.lld variants (#117338)

If we create a cross toolchain with a ${triple}-ld.lld symlink, clang
finds that symlink and when it uses it, it's not recognized as "lld".
Let's resolve that symlink and consider it when determining lld-ness.

For example, clang provides hexagon-link specific link arguments such as
`-mcpu=hexagonv65` and `-march=hexagon` when
hexagon-unknown-linux-musl-ld.lld is found. lld rejects this with the
following error:

hexagon-unknown-linux-musl-ld.lld: error: unknown emulation:
cpu=hexagonv65

(cherry picked from commit 2dc0de753b6df83e35f3d98e0e6a26c95e3399c0)
---
 clang/lib/Driver/ToolChains/Hexagon.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp
index 29781399cbab4..be7851adecea6 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -294,9 +294,10 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles);
   bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
   bool UseG0 = false;
-  const char *Exec = Args.MakeArgString(HTC.GetLinkerPath());
-  bool UseLLD = (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") ||
-                 llvm::sys::path::stem(Exec).equals_insensitive("ld.lld"));
+  bool UseLLD = false;
+  const char *Exec = Args.MakeArgString(HTC.GetLinkerPath(&UseLLD));
+  UseLLD = UseLLD || llvm::sys::path::filename(Exec).ends_with("ld.lld") ||
+           llvm::sys::path::stem(Exec).ends_with("ld.lld");
   bool UseShared = IsShared && !IsStatic;
   StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args);
 

From 422938476c0beb9f327175e077a0c6b6f71b3069 Mon Sep 17 00:00:00 2001
From: Matt McCormick <matt@mmmccormick.com>
Date: Wed, 22 Jan 2025 14:50:48 -0500
Subject: [PATCH 427/427] [libcxx] make LIBCXXABI_PTHREAD_LIB_NAME configurable

Alternative is "wasi-emulated-pthread".
---
 libcxxabi/src/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 1e06b8cbf084d..b245da0d0a83f 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -69,7 +69,8 @@ endif()
 
 if (NOT APPLE) # On Apple platforms, we always use -nostdlib++ so we don't need to re-add other libraries
   if (LIBCXXABI_ENABLE_THREADS)
-    add_library_flags_if(LIBCXXABI_HAS_PTHREAD_LIB pthread)
+    set(LIBCXXABI_PTHREAD_LIB_NAME "pthread" CACHE STRING "")
+    add_library_flags_if(LIBCXXABI_HAS_PTHREAD_LIB ${LIBCXXABI_PTHREAD_LIB_NAME})
   endif()
 
   add_library_flags_if(LIBCXXABI_HAS_C_LIB c)