From e6de9f21b162c57dd09cb4de3147b7ab09ef8681 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 9 Feb 2024 13:29:19 +0000 Subject: [PATCH 1/5] [TBAA] Extract logic to use TBAA tag for field of !tbaa.struct (NFC). --- llvm/include/llvm/IR/Metadata.h | 5 +++++ llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 14 ++++++++++++++ .../Transforms/InstCombine/InstCombineCalls.cpp | 15 +-------------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index db1f44fea3b45..6f23ac44dee96 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -844,6 +844,11 @@ struct AAMDNodes { /// together. Different from `merge`, where different locations should /// overlap each other, `concat` puts non-overlapping locations together. AAMDNodes concat(const AAMDNodes &Other) const; + + /// Create a new AAMDNode for accessing \p AccessSize bytes of this AAMDNode. + /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the + /// field at offset 0, get the TBAA tag describing the accessed field. + AAMDNodes adjustForAccess(unsigned AccessSize); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index e4dc1a867f6f0..edc08cde686f1 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -817,3 +817,17 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len)); return MDNode::get(MD->getContext(), NextNodes); } + +AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { + AAMDNodes New = *this; + MDNode *M = New.TBAAStruct; + New.TBAAStruct = nullptr; + if (M && M->getNumOperands() == 3 && M->getOperand(0) && + mdconst::hasa(M->getOperand(0)) && + mdconst::extract(M->getOperand(0))->isZero() && + M->getOperand(1) && mdconst::hasa(M->getOperand(1)) && + mdconst::extract(M->getOperand(1))->getValue() == AccessSize && + M->getOperand(2) && isa(M->getOperand(2))) + New.TBAA = cast(M->getOperand(2)); + return New; +} diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ed5d44757fbeb..56d1259e95519 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -172,20 +172,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { // If the memcpy has metadata describing the members, see if we can get the // TBAA tag describing our copy. - AAMDNodes AACopyMD = MI->getAAMetadata(); - - if (MDNode *M = AACopyMD.TBAAStruct) { - AACopyMD.TBAAStruct = nullptr; - if (M->getNumOperands() == 3 && M->getOperand(0) && - mdconst::hasa(M->getOperand(0)) && - mdconst::extract(M->getOperand(0))->isZero() && - M->getOperand(1) && - mdconst::hasa(M->getOperand(1)) && - mdconst::extract(M->getOperand(1))->getValue() == - Size && - M->getOperand(2) && isa(M->getOperand(2))) - AACopyMD.TBAA = cast(M->getOperand(2)); - } + AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size); Value *Src = MI->getArgOperand(1); Value *Dest = MI->getArgOperand(0); From 99cf032dfabb21b820559bae61d2354e56336fdd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 9 Feb 2024 16:25:32 +0000 Subject: [PATCH 2/5] [TBAA] Only clear TBAAStruct if field can be extracted. Retain TBAAStruct if we fail to match the access to a single field. All users at the moment use this when using the full size of the original access. SROA also retains the original TBAAStruct when accessing parts at offset 0. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector>: https://godbolt.org/z/f3vqYos3c Depends on https://github.com/llvm/llvm-project/pull/81284 --- llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 8 +++++--- llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index edc08cde686f1..bfd70414c0340 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -821,13 +821,15 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { AAMDNodes New = *this; MDNode *M = New.TBAAStruct; - New.TBAAStruct = nullptr; if (M && M->getNumOperands() == 3 && M->getOperand(0) && mdconst::hasa(M->getOperand(0)) && mdconst::extract(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa(M->getOperand(1)) && - mdconst::extract(M->getOperand(1))->getValue() == AccessSize && - M->getOperand(2) && isa(M->getOperand(2))) + mdconst::extract(M->getOperand(1))->getValue() == + AccessSize && + M->getOperand(2) && isa(M->getOperand(2))) { + New.TBAAStruct = nullptr; New.TBAA = cast(M->getOperand(2)); + } return New; } diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll index 1042c413fbb7b..996d2c0e67e16 100644 --- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll +++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll @@ -38,8 +38,8 @@ define ptr @test2() { define void @test3_multiple_fields(ptr nocapture %a, ptr nocapture %b) { ; CHECK-LABEL: @test3_multiple_fields( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4 -; CHECK-NEXT: store i64 [[TMP0]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3]] ; CHECK-NEXT: ret void ; entry: @@ -86,4 +86,5 @@ entry: ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]]} ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[TBAA_STRUCT3]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]} ;. From 90639e9131670863ebb4c199a9861b2b0094d601 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 9 Feb 2024 15:17:09 +0000 Subject: [PATCH 3/5] [SROA] Use !tbaa instead of !tbaa.struct if op matches field. If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct. InstCombine already had a similar logic. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector>: https://godbolt.org/z/f3vqYos3c Depends on https://github.com/llvm/llvm-project/pull/81285. --- llvm/include/llvm/IR/Metadata.h | 2 + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 13 ++++++ llvm/lib/Transforms/Scalar/SROA.cpp | 48 ++++++++++++++------ llvm/test/Transforms/SROA/tbaa-struct2.ll | 21 ++++----- llvm/test/Transforms/SROA/tbaa-struct3.ll | 16 +++---- 5 files changed, 67 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 6f23ac44dee96..33363a271d482 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -849,6 +849,8 @@ struct AAMDNodes { /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the /// field at offset 0, get the TBAA tag describing the accessed field. AAMDNodes adjustForAccess(unsigned AccessSize); + AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index bfd70414c0340..b2dc451d58193 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { } return New; } + +AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL) { + + AAMDNodes New = shift(Offset); + if (!DL.typeSizeEqualsStoreSize(AccessTy)) + return New; + TypeSize Size = DL.getTypeStoreSize(AccessTy); + if (Size.isScalable()) + return New; + + return New.adjustForAccess(Size.getKnownMinValue()); +} diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 138dc38b5c14c..f24cbbc1fe059 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor { // Do this after copyMetadataForLoad() to preserve the TBAA shift. if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); // Try to preserve nonnull metadata V = NewLI; @@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor { IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); + if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, @@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); Pass.DeadInsts.push_back(&SI); // NOTE: Careful to use OrigV rather than V. @@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, Store, Store->getPointerOperand(), @@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor { } NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor { IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getPointerOperand(), V, DL); @@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor { Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Load->getType(), DL)); Src = Load; } @@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor { IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Src->getType(), DL)); + } APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); if (IsDest) { @@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor { DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); if (AATags && GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + Load->setAAMetadata( + AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); @@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor { APInt Offset( DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset); - if (AATags) - Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess( + Offset.getZExtValue(), ExtractValue->getType(), DL)); + } // migrateDebugInfo requires the base Alloca. Walk to it from this gep. // If we cannot (because there's an intervening non-const or unbounded @@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Value *StoreBasePtr = SI->getPointerOperand(); IRB.SetInsertPoint(SI); + AAMDNodes AATags = SI->getAAMetadata(); LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); @@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group, LLVMContext::MD_DIAssignID}); + + if (AATags) + PStore->setAAMetadata( + AATags.adjustForAccess(PartOffset, PLoad->getType(), DL)); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 1fd37e82d6777..02c99a2b32945 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -13,9 +13,9 @@ define double @bar(ptr %wishart) { ; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 -; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; @@ -38,15 +38,14 @@ define double @bar(ptr %wishart) { ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5} -; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0} -; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0} -; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0} -; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"} -; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0} -; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0} -; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5} -; CHECK: [[TBAA_STRUCT8]] = !{} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"int", [[META3]], i64 0} +; CHECK: [[TBAA_STRUCT7]] = !{} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 4910e0e07ae38..603e7d708647f 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4 -; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]] ; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8 ; CHECK-NEXT: ret void ; @@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8 -; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] +; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]] ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 -; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] +; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: ret void ; entry: @@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32 ; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]] +; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C++ TBAA"} ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]} -; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]} ;. From 137fe547415c6196498405f23e11f2fc4831675a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 15 Feb 2024 21:30:21 +0000 Subject: [PATCH 4/5] !fixup update new test cases, add handle missing cases. --- llvm/include/llvm/IR/Metadata.h | 1 + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 6 ++- llvm/lib/Transforms/Scalar/SROA.cpp | 16 +++--- llvm/test/Transforms/SROA/tbaa-struct3.ll | 51 ++++++++++++-------- 4 files changed, 46 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 94065b52740a5..da6744fdd0916 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -851,6 +851,7 @@ struct AAMDNodes { AAMDNodes adjustForAccess(unsigned AccessSize); AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL); + AAMDNodes adjustForAccess(size_t Offset, unsigned AccessSize); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index b2dc451d58193..a8c881fce5fd4 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -836,7 +836,6 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL) { - AAMDNodes New = shift(Offset); if (!DL.typeSizeEqualsStoreSize(AccessTy)) return New; @@ -846,3 +845,8 @@ AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, return New.adjustForAccess(Size.getKnownMinValue()); } + +AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) { + AAMDNodes New = shift(Offset); + return New.adjustForAccess(AccessSize); +} diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index f24cbbc1fe059..6c8785d52c4ea 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2936,6 +2936,7 @@ class AllocaSliceRewriter : public InstVisitor { LoadInst *NewLI = IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); + if (AATags) NewLI->setAAMetadata(AATags.adjustForAccess( NewBeginOffset - BeginOffset, NewLI->getType(), DL)); @@ -3102,10 +3103,9 @@ class AllocaSliceRewriter : public InstVisitor { } NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, V->getType(), DL)); - } if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -3207,12 +3207,14 @@ class AllocaSliceRewriter : public InstVisitor { // a single value type, just emit a memset. if (!CanContinue) { Type *SizeTy = II.getLength()->getType(); - Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); + unsigned Sz = NewEndOffset - NewBeginOffset; + Constant *Size = ConstantInt::get(SizeTy, Sz); MemIntrinsic *New = cast(IRB.CreateMemSet( getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, MaybeAlign(getSliceAlign()), II.isVolatile())); if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + New->setAAMetadata( + AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz)); migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getRawDest(), nullptr, DL); @@ -3287,10 +3289,9 @@ class AllocaSliceRewriter : public InstVisitor { IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, V->getType(), DL)); - } migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getPointerOperand(), V, DL); @@ -3517,10 +3518,9 @@ class AllocaSliceRewriter : public InstVisitor { IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, Src->getType(), DL)); - } APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); if (IsDest) { diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index a3414e8ace538..2a151c01adfcc 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -100,7 +100,7 @@ define void @store_vector_part_first(ptr %y2, float %f) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) ; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8 -; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT5]] +; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA1]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -118,7 +118,7 @@ define void @store_vector_part_second(ptr %y2, float %f) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) ; CHECK-NEXT: store float [[F]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT9:![0-9]+]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4 -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA7:![0-9]+]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) { ; CHECK-LABEL: define void @store_vector_single( ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT11:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float } @@ -161,8 +161,8 @@ define void @memset(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_0_SRC_SROA_IDX]], i32 10, i1 false) ; CHECK-NEXT: store i16 1, ptr [[A_SROA_3]], align 2 ; CHECK-NEXT: [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]] -; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT11:![0-9]+]] +; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA1]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A_SROA_3]], align 2 @@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false) ; CHECK-NEXT: store i8 1, ptr [[A_SROA_3]], align 1 ; CHECK-NEXT: [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT14:![0-9]+]] -; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA7]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1 @@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4 -; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]] +; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true) @@ -279,8 +279,8 @@ define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: store i8 [[A_SROA_0_SROA_4_1_COPYLOAD]], ptr [[A_SROA_0_SROA_4]], align 1 ; CHECK-NEXT: [[A_SROA_4_1_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_1_SRC_SROA_IDX]], i32 5, i1 false) -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]] -; CHECK-NEXT: store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa.struct [[TBAA_STRUCT18:![0-9]+]] +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT14:![0-9]+]] +; CHECK-NEXT: store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_COPYLOAD3:%.*]] = load volatile <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2 @@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8 ; CHECK-NEXT: store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8 -; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT10]] +; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8 ; CHECK-NEXT: store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8 ; CHECK-NEXT: [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8 @@ -356,7 +356,7 @@ define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT19:![0-9]+]] +; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1 @@ -393,7 +393,7 @@ define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr % ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8 -; CHECK-NEXT: store <2 x i32> , ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT19]] +; CHECK-NEXT: store <2 x i32> , ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 8 [[A_SROA_0]], i32 4, i1 true) ; CHECK-NEXT: ret <2 x i16> [[A_SROA_0_0_A_SROA_0_0_L]] @@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9 ; CHECK-NEXT: [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1 ; CHECK-NEXT: store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1 -; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20:![0-9]+]] +; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]] ; CHECK-NEXT: [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] +; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]] ; CHECK-NEXT: [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1 -; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]] +; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2 ; CHECK-NEXT: [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1 ; CHECK-NEXT: [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32 @@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1 ; CHECK-NEXT: [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2 ; CHECK-NEXT: [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0 -; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20]] +; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16]] ; CHECK-NEXT: [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1 -; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21]] +; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17]] ; CHECK-NEXT: [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2 -; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]] +; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8 ; CHECK-NEXT: store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1 ; CHECK-NEXT: [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2 @@ -553,4 +553,17 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT6]] = !{i64 0, i64 8, [[TBAA7]], i64 8, i64 4, [[TBAA1]]} +; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"v2f32", [[META3]], i64 0} +; CHECK: [[TBAA_STRUCT9]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 8, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT10]] = !{i64 0, i64 8, [[TBAA7]], i64 4, i64 8, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT11]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 6, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT12]] = !{i64 0, i64 7, [[TBAA7]], i64 7, i64 1, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT13]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 2, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT14]] = !{i64 0, i64 3, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT15]] = !{i64 0, i64 4, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT16]] = !{i64 0, i64 2, [[TBAA7]], i64 4, i64 4, [[TBAA7]], i64 8, i64 1, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT17]] = !{i64 0, i64 4, [[TBAA7]], i64 4, i64 1, [[TBAA7]]} ;. From 51c915cd9da740a4a93a5f86b4b6a381206ee480 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 16 Feb 2024 13:41:13 +0000 Subject: [PATCH 5/5] !fixup update clang test. --- clang/test/CodeGen/aarch64-ABI-align-packed.c | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c index 93b81d50261bf..2b029f6458956 100644 --- a/clang/test/CodeGen/aarch64-ABI-align-packed.c +++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c @@ -59,7 +59,7 @@ struct non_packed_struct gs_non_packed_struct; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_NON_PACKED_STRUCT_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2:![0-9]+]] -// CHECK-NEXT: store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] +// CHECK-NEXT: store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -114,7 +114,7 @@ struct packed_struct gs_packed_struct; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_STRUCT_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -169,7 +169,7 @@ struct packed_member gs_packed_member; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_MEMBER_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -224,7 +224,7 @@ struct aligned_struct_8 gs_aligned_struct_8; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_STRUCT_8_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -279,7 +279,7 @@ struct aligned_member_8 gs_aligned_member_8; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_MEMBER_8_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -334,7 +334,7 @@ struct pragma_packed_struct_8 gs_pragma_packed_struct_8; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_8_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -389,7 +389,7 @@ struct pragma_packed_struct_4 gs_pragma_packed_struct_4; // CHECK-NEXT: entry: // CHECK-NEXT: [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_4_COERCE]], 0 // CHECK-NEXT: store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]] -// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa.struct [[TBAA_STRUCT6]] +// CHECK-NEXT: store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa [[TBAA6]] // CHECK-NEXT: ret void __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, double d1, double d2, double d3, double d4, double d5, double d6, double d7, @@ -436,3 +436,10 @@ void test_pragma_packed_struct_4() { named_arg_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4); variadic_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4); } +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"double", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0} +//.