[clang][SPARC] Pass 16-aligned structs with the correct alignment in CC (llvm#155829)

koachan · brad0 · commit 9ee4ac8a8359 · 2025-10-02T22:46:34.000-04:00
Pad argument registers to preserve overaligned structs in LLVM IR. Additionally, since i128 values will be lowered as split i64 pairs in the backend, correctly set the alignment of such arguments as 16 bytes. This should make clang compliant with the ABI specification and fix llvm#144709. (cherry picked from commit 6679e43)
diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp
@@ -8,6 +8,7 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
+#include <algorithm>
 
 using namespace clang;
 using namespace clang::CodeGen;
@@ -109,7 +110,8 @@ class SparcV9ABIInfo : public ABIInfo {
   SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
 private:
-  ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
+  ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit,
+                          unsigned &RegOffset) const;
   void computeInfo(CGFunctionInfo &FI) const override;
   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                    AggValueSlot Slot) const override;
@@ -222,127 +224,114 @@ class SparcV9ABIInfo : public ABIInfo {
 };
 } // end anonymous namespace
 
-ABIArgInfo
-SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
+ABIArgInfo SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit,
+                                        unsigned &RegOffset) const {
   if (Ty->isVoidType())
     return ABIArgInfo::getIgnore();
 
-  uint64_t Size = getContext().getTypeSize(Ty);
+  auto &Context = getContext();
+  auto &VMContext = getVMContext();
+
+  uint64_t Size = Context.getTypeSize(Ty);
+  unsigned Alignment = Context.getTypeAlign(Ty);
+  bool NeedPadding = (Alignment > 64) && (RegOffset % 2 != 0);
 
   // Anything too big to fit in registers is passed with an explicit indirect
   // pointer / sret pointer.
-  if (Size > SizeLimit)
+  if (Size > SizeLimit) {
+    RegOffset += 1;
     return getNaturalAlignIndirect(
         Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
         /*ByVal=*/false);
+  }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
   // Integer types smaller than a register are extended.
-  if (Size < 64 && Ty->isIntegerType())
+  if (Size < 64 && Ty->isIntegerType()) {
+    RegOffset += 1;
     return ABIArgInfo::getExtend(Ty);
+  }
 
   if (const auto *EIT = Ty->getAs<BitIntType>())
-    if (EIT->getNumBits() < 64)
+    if (EIT->getNumBits() < 64) {
+      RegOffset += 1;
       return ABIArgInfo::getExtend(Ty);
+    }
 
   // Other non-aggregates go in registers.
-  if (!isAggregateTypeForABI(Ty))
+  if (!isAggregateTypeForABI(Ty)) {
+    RegOffset += Size / 64;
     return ABIArgInfo::getDirect();
+  }
 
   // If a C++ object has either a non-trivial copy constructor or a non-trivial
   // destructor, it is passed with an explicit indirect pointer / sret pointer.
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+    RegOffset += 1;
     return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
                                    RAA == CGCXXABI::RAA_DirectInMemory);
+  }
 
   // This is a small aggregate type that should be passed in registers.
   // Build a coercion type from the LLVM struct type.
   llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
-  if (!StrTy)
+  if (!StrTy) {
+    RegOffset += Size / 64;
     return ABIArgInfo::getDirect();
+  }
 
-  CoerceBuilder CB(getVMContext(), getDataLayout());
+  CoerceBuilder CB(VMContext, getDataLayout());
   CB.addStruct(0, StrTy);
   // All structs, even empty ones, should take up a register argument slot,
   // so pin the minimum struct size to one bit.
   CB.pad(llvm::alignTo(
       std::max(CB.DL.getTypeSizeInBits(StrTy).getKnownMinValue(), uint64_t(1)),
       64));
+  RegOffset += CB.Size / 64;
+
+  // If we're dealing with overaligned structs we may need to add a padding in
+  // the front, to preserve the correct register-memory mapping.
+  //
+  // See SCD 2.4.1, pages 3P-11 and 3P-12.
+  llvm::Type *Padding =
+      NeedPadding ? llvm::Type::getInt64Ty(VMContext) : nullptr;
+  RegOffset += NeedPadding ? 1 : 0;
 
   // Try to use the original type for coercion.
   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
 
-  if (CB.InReg)
-    return ABIArgInfo::getDirectInReg(CoerceTy);
-  else
-    return ABIArgInfo::getDirect(CoerceTy);
+  ABIArgInfo AAI = ABIArgInfo::getDirect(CoerceTy, 0, Padding);
+  AAI.setInReg(CB.InReg);
+  return AAI;
 }
 
 RValue SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                  QualType Ty, AggValueSlot Slot) const {
-  ABIArgInfo AI = classifyType(Ty, 16 * 8);
-  llvm::Type *ArgTy = CGT.ConvertType(Ty);
-  if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
-    AI.setCoerceToType(ArgTy);
-
   CharUnits SlotSize = CharUnits::fromQuantity(8);
+  auto TInfo = getContext().getTypeInfoInChars(Ty);
 
-  CGBuilderTy &Builder = CGF.Builder;
-  Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
-                         getVAListElementType(CGF), SlotSize);
-  llvm::Type *ArgPtrTy = CGF.UnqualPtrTy;
-
-  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
-
-  Address ArgAddr = Address::invalid();
-  CharUnits Stride;
-  switch (AI.getKind()) {
-  case ABIArgInfo::Expand:
-  case ABIArgInfo::CoerceAndExpand:
-  case ABIArgInfo::InAlloca:
-    llvm_unreachable("Unsupported ABI kind for va_arg");
-
-  case ABIArgInfo::Extend: {
-    Stride = SlotSize;
-    CharUnits Offset = SlotSize - TypeInfo.Width;
-    ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
-    break;
-  }
-
-  case ABIArgInfo::Direct: {
-    auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
-    Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
-    ArgAddr = Addr;
-    break;
-  }
-
-  case ABIArgInfo::Indirect:
-  case ABIArgInfo::IndirectAliased:
-    Stride = SlotSize;
-    ArgAddr = Addr.withElementType(ArgPtrTy);
-    ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy,
-                      TypeInfo.Align);
-    break;
+  // Zero-sized types have a width of one byte for parameter passing purposes.
+  TInfo.Width = std::max(TInfo.Width, CharUnits::fromQuantity(1));
 
-  case ABIArgInfo::Ignore:
-    return Slot.asRValue();
-  }
-
-  // Update VAList.
-  Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next");
-  Builder.CreateStore(NextPtr.emitRawPointer(CGF), VAListAddr);
-
-  return CGF.EmitLoadOfAnyValue(
-      CGF.MakeAddrLValue(ArgAddr.withElementType(ArgTy), Ty), Slot);
+  // Arguments bigger than 2*SlotSize bytes are passed indirectly.
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty,
+                          /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo,
+                          SlotSize,
+                          /*AllowHigherAlign=*/true, Slot);
 }
 
 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
+  unsigned RetOffset = 0;
+  ABIArgInfo RetType = classifyType(FI.getReturnType(), 32 * 8, RetOffset);
+  FI.getReturnInfo() = RetType;
+
+  // Indirect returns will have its pointer passed as an argument.
+  unsigned ArgOffset = RetType.isIndirect() ? RetOffset : 0;
   for (auto &I : FI.arguments())
-    I.info = classifyType(I.type, 16 * 8);
+    I.info = classifyType(I.type, 16 * 8, ArgOffset);
 }
 
 namespace {
diff --git a/clang/test/CodeGen/sparcv9-abi.c b/clang/test/CodeGen/sparcv9-abi.c
@@ -25,12 +25,35 @@ long double f_ld(long double x) { return x; }
 struct empty {};
 struct emptyarr { struct empty a[10]; };
 
+// In 16-byte structs, 16-byte aligned members are expanded
+// to their corresponding i128/f128 types.
+struct align16_int { _Alignas(16) int x; };
+struct align16_mixed { _Alignas(16) int x; double y; };
+struct align16_longdouble { long double x; };
+
 // CHECK-LABEL: define{{.*}} i64 @f_empty(i64 %x.coerce)
 struct empty f_empty(struct empty x) { return x; }
 
 // CHECK-LABEL: define{{.*}} i64 @f_emptyarr(i64 %x.coerce)
 struct empty f_emptyarr(struct emptyarr x) { return x.a[0]; }
 
+// CHECK-LABEL: define{{.*}} void @f_aligncaller(i64 %a.coerce0, i64 %a.coerce1)
+// CHECK-LABEL: declare{{.*}} void @f_aligncallee(i32 noundef signext, i64, i64, i64)
+void f_aligncallee(int pad, struct align16_int a);
+void f_aligncaller(struct align16_int a) {
+    f_aligncallee(0, a);
+}
+
+// CHECK-LABEL: define{{.*}} double @f_mixed_aligned(i64 noundef %a, i64 %0, i64 %b.coerce0, double %b.coerce1)
+double f_mixed_aligned(long a, struct align16_mixed b) {
+	return b.y;
+}
+
+// CHECK-LABEL: define{{.*}} fp128 @f_longdouble(i64 noundef %a, i64 %0, fp128 %b.coerce)
+long double f_longdouble(long a, struct align16_longdouble b) {
+	return b.x;
+}
+
 // CHECK-LABEL: define{{.*}} i64 @f_emptyvar(i32 noundef zeroext %count, ...)
 long f_emptyvar(unsigned count, ...) {
     long ret;
@@ -80,13 +103,25 @@ struct medium {
   int *c, *d;
 };
 
+struct medium_aligned {
+  _Alignas(16) int *a;
+  int *b, *c, *d;
+};
+
 // CHECK-LABEL: define{{.*}} %struct.medium @f_medium(ptr noundef %x)
 struct medium f_medium(struct medium x) {
   x.a += *x.b;
   x.b = 0;
   return x;
 }
 
+// CHECK-LABEL: define{{.*}} %struct.medium_aligned @f_medium_aligned(ptr noundef %x)
+struct medium_aligned f_medium_aligned(struct medium_aligned x) {
+  x.a += *x.b;
+  x.b = 0;
+  return x;
+}
+
 // Large structs are also returned indirectly.
 struct large {
   int *a, *b;
@@ -101,6 +136,15 @@ struct large f_large(struct large x) {
   return x;
 }
 
+// Large returns are converted into a pointer argument.
+// Such conversion should preserve the alignment of overaligned arguments.
+// define{{.*}} void @f_largereturn_aligned(ptr dead_on_unwind noalias writable sret(%struct.large) align 8 %agg.result, i64 %0, i64 %x.coerce0, i64 %x.coerce1)
+struct large f_largereturn_aligned(struct align16_int x) {
+  struct large ret;
+  ret.x = x.x;
+  return ret;
+}
+
 // A 64-bit struct fits in a register.
 struct reg {
   int a, b;
@@ -215,6 +259,18 @@ int f_variable(char *f, ...) {
   case 'm':
     s += *va_arg(ap, struct medium).a;
     break;
+
+// CHECK: %[[CUR:[^ ]+]] = load ptr, ptr %ap
+// CHECK-DAG: %[[TMP:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
+// CHECK-DAG: %[[ALIGNED:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP]], i64 -16)
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, ptr %[[ALIGNED]], i64 16
+// CHECK-DAG: store ptr %[[NXT]], ptr %ap
+// CHECK-DAG: call void @llvm.memcpy.p0.p0.i64(ptr align 16 {{.*}}, ptr align 16 %[[ALIGNED]], i64 16, i1 false)
+// CHECK: br
+  case 'a':
+    s += va_arg(ap, struct align16_int).x;
+    break;
   }
+  va_end(ap);
   return s;
 }
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -115,7 +115,8 @@ static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT,
 
   // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
   unsigned size      = (LocVT == MVT::f128) ? 16 : 8;
-  Align alignment = (LocVT == MVT::f128) ? Align(16) : Align(8);
+  Align alignment =
+      (LocVT == MVT::f128 || ArgFlags.isSplit()) ? Align(16) : Align(8);
   unsigned Offset = State.AllocateStack(size, alignment);
   unsigned Reg = 0;
 
diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll
@@ -473,15 +473,28 @@ declare i64 @receive_fp128(i64 %a, ...)
 ; HARD-DAG:   ldx [%sp+[[Offset0]]], %o2
 ; HARD-DAG:   ldx [%sp+[[Offset1]]], %o3
 ; SOFT-DAG:   mov  %i0, %o0
-; SOFT-DAG:   mov  %i1, %o1
 ; SOFT-DAG:   mov  %i2, %o2
+; SOFT-DAG:   mov  %i3, %o3
 ; CHECK:      call receive_fp128
 define i64 @test_fp128_variable_args(i64 %a, fp128 %b) {
 entry:
   %0 = call i64 (i64, ...) @receive_fp128(i64 %a, fp128 %b)
   ret i64 %0
 }
 
+declare i64 @receive_i128(i64 %a, i128 %b)
+
+; CHECK-LABEL: test_i128_args:
+; CHECK:   mov  %i3, %o3
+; CHECK:   mov  %i2, %o2
+; CHECK:   mov  %i0, %o0
+; CHECK:   call receive_i128
+define i64 @test_i128_args(i64 %a, i128 %b) {
+entry:
+  %0 = call i64 @receive_i128(i64 %a, i128 %b)
+  ret i64 %0
+}
+
 ; CHECK-LABEL: test_call_libfunc:
 ; HARD:   st %f1, [%fp+[[Offset0:[0-9]+]]]
 ; HARD:   fmovs %f3, %f1