-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoopDist] Consider reads and writes together for runtime checks #145623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Michael Berg (mcberg2021) ChangesEmit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses. Full diff: https://github.com/llvm/llvm-project/pull/145623.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 0ac1a15981770..5041a71fcea4a 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -521,6 +521,33 @@ class InstPartitionContainer {
Partition = -1;
}
assert(Partition != -2 && "Pointer not belonging to any partition");
+ // All the store context uses of our address were processed,
+ // Now make sure we don't have cross partition loads.
+ if (RtPtrCheck->Pointers[I].IsWritePtr) {
+ if (Ptr->hasOneUse() || Partition == -1)
+ continue;
+
+ bool ProcessLoads = false;
+ for (User *U : Ptr->users())
+ if (auto *CurLoad = dyn_cast<LoadInst>(U))
+ if (L->contains(CurLoad->getParent())) {
+ ProcessLoads = true;
+ break;
+ }
+
+ if (!ProcessLoads)
+ continue;
+
+ const bool IsWritePtr = false;
+ auto Instructions = LAI.getInstructionsForAccess(Ptr, IsWritePtr);
+ for (Instruction *Inst : Instructions)
+ if (Partition != (int)this->InstToPartitionId[Inst]) {
+ // -1 means belonging to multiple partitions.
+ Partition = -1;
+ break;
+ }
+
+ }
}
return PtrToPartitions;
diff --git a/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll b/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll
new file mode 100644
index 0000000000000..db88943d6a722
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -aa-pipeline=basic-aa -passes=loop-simplify,loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S %s | FileCheck %s
+
+; Test emit safety guards for ptr access of %a and %c on a cross parition load which
+; has a corresponding store to the same address. This ensures that if %a and %c
+; overlap in some scenarios, that we execute the original loop for safety reasons.
+
+define dso_local void @_Z13distribution3PiS_S_S_i(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, ptr nocapture noundef %c, ptr nocapture noundef writeonly %d, i32 noundef signext %len) {
+; CHECK-LABEL: define dso_local void @_Z13distribution3PiS_S_S_i(
+; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef captures(none) [[C:%.*]], ptr noundef writeonly captures(none) [[D:%.*]], i32 noundef signext [[LEN:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[LEN]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[SUB_NEG:%.*]] = add nsw i32 [[LEN]], -2147483647
+; CHECK-NEXT: [[CMP1_NOT51:%.*]] = icmp eq i32 [[LEN]], 1
+; CHECK-NEXT: br i1 [[CMP1_NOT51]], label %[[IF_END]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[ADD:%.*]] = sub nuw i32 -2147483648, [[LEN]]
+; CHECK-NEXT: [[I:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_CHECK:.*]]
+; CHECK: [[FOR_BODY_LVER_CHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -2147483647, [[LEN]]
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 8589934596, [[TMP2]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[D]], i64 4
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND07:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND18:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT9]]
+; CHECK-NEXT: [[BOUND010:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP6]]
+; CHECK-NEXT: [[BOUND111:%.*]] = icmp ult ptr [[SCEVGEP5]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
+; CHECK-NEXT: [[CONFLICT_RDX13:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT12]]
+; CHECK-NEXT: [[BOUND014:%.*]] = icmp ult ptr [[C]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND115:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP2]]
+; CHECK-NEXT: [[FOUND_CONFLICT16:%.*]] = and i1 [[BOUND014]], [[BOUND115]]
+; CHECK-NEXT: [[CONFLICT_RDX17:%.*]] = or i1 [[CONFLICT_RDX13]], [[FOUND_CONFLICT16]]
+; CHECK-NEXT: [[BOUND018:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP6]]
+; CHECK-NEXT: [[BOUND119:%.*]] = icmp ult ptr [[SCEVGEP5]], [[SCEVGEP4]]
+; CHECK-NEXT: [[FOUND_CONFLICT20:%.*]] = and i1 [[BOUND018]], [[BOUND119]]
+; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX17]], [[FOUND_CONFLICT20]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -2147483647, [[LEN]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 2147483647, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP6]], 4294967295
+; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 1, [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP6]], 4294967295
+; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP11]], [[TMP16]]
+; CHECK-NEXT: [[LVER_SAFE:%.*]] = or i1 [[CONFLICT_RDX21]], [[TMP17]]
+; CHECK-NEXT: br i1 [[LVER_SAFE]], label %[[FOR_BODY_PH_LVER_ORIG:.*]], label %[[FOR_BODY_PH_LDIST1:.*]]
+; CHECK: [[FOR_BODY_PH_LVER_ORIG]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_ORIG:.*]]
+; CHECK: [[FOR_BODY_LVER_ORIG]]:
+; CHECK-NEXT: [[INDVARS_IV_LVER_ORIG:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH_LVER_ORIG]] ], [ [[INDVARS_IV_NEXT_LVER_ORIG:%.*]], %[[FOR_BODY_LVER_ORIG]] ]
+; CHECK-NEXT: [[I1_LVER_ORIG:%.*]] = trunc i64 [[INDVARS_IV_LVER_ORIG]] to i32
+; CHECK-NEXT: [[SUB3_LVER_ORIG:%.*]] = add i32 [[SUB_NEG]], [[I1_LVER_ORIG]]
+; CHECK-NEXT: [[IDXPROM_LVER_ORIG:%.*]] = sext i32 [[SUB3_LVER_ORIG]] to i64
+; CHECK-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: [[I2_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX_LVER_ORIG]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[ADD4_LVER_ORIG:%.*]] = add nsw i32 [[I2_LVER_ORIG]], 1
+; CHECK-NEXT: [[ARRAYIDX8_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[ADD4_LVER_ORIG]], ptr [[ARRAYIDX8_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[I3_LVER_ORIG:%.*]] = getelementptr i32, ptr [[C]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: [[ARRAYIDX17_LVER_ORIG:%.*]] = getelementptr i8, ptr [[I3_LVER_ORIG]], i64 -4
+; CHECK-NEXT: [[I4_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX17_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[SUB18_LVER_ORIG:%.*]] = sub nsw i32 [[ADD4_LVER_ORIG]], [[I4_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[SUB18_LVER_ORIG]], ptr [[I3_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[I5_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX8_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[ADD27_LVER_ORIG:%.*]] = add nsw i32 [[I5_LVER_ORIG]], 2
+; CHECK-NEXT: [[ARRAYIDX31_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[ADD27_LVER_ORIG]], ptr [[ARRAYIDX31_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_LVER_ORIG]] = add i64 [[INDVARS_IV_LVER_ORIG]], 1
+; CHECK-NEXT: [[I6_LVER_ORIG:%.*]] = and i64 [[INDVARS_IV_NEXT_LVER_ORIG]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT_LVER_ORIG:%.*]] = icmp eq i64 [[I6_LVER_ORIG]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT_LVER_ORIG]], label %[[IF_END_LOOPEXIT_LOOPEXIT:.*]], label %[[FOR_BODY_LVER_ORIG]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[FOR_BODY_PH_LDIST1]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LDIST1:.*]]
+; CHECK: [[FOR_BODY_LDIST1]]:
+; CHECK-NEXT: [[INDVARS_IV_LDIST1:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH_LDIST1]] ], [ [[INDVARS_IV_NEXT_LDIST1:%.*]], %[[FOR_BODY_LDIST1]] ]
+; CHECK-NEXT: [[I1_LDIST1:%.*]] = trunc i64 [[INDVARS_IV_LDIST1]] to i32
+; CHECK-NEXT: [[SUB3_LDIST1:%.*]] = add i32 [[SUB_NEG]], [[I1_LDIST1]]
+; CHECK-NEXT: [[IDXPROM_LDIST1:%.*]] = sext i32 [[SUB3_LDIST1]] to i64
+; CHECK-NEXT: [[ARRAYIDX_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: [[I2_LDIST1:%.*]] = load i32, ptr [[ARRAYIDX_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT: [[ADD4_LDIST1:%.*]] = add nsw i32 [[I2_LDIST1]], 1
+; CHECK-NEXT: [[ARRAYIDX8_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: store i32 [[ADD4_LDIST1]], ptr [[ARRAYIDX8_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
+; CHECK-NEXT: [[I3_LDIST1:%.*]] = getelementptr i32, ptr [[C]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: [[ARRAYIDX17_LDIST1:%.*]] = getelementptr i8, ptr [[I3_LDIST1]], i64 -4
+; CHECK-NEXT: [[I4_LDIST1:%.*]] = load i32, ptr [[ARRAYIDX17_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; CHECK-NEXT: [[SUB18_LDIST1:%.*]] = sub nsw i32 [[ADD4_LDIST1]], [[I4_LDIST1]]
+; CHECK-NEXT: store i32 [[SUB18_LDIST1]], ptr [[I3_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META14]], !noalias [[META15]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_LDIST1]] = add i64 [[INDVARS_IV_LDIST1]], 1
+; CHECK-NEXT: [[I6_LDIST1:%.*]] = and i64 [[INDVARS_IV_NEXT_LDIST1]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT_LDIST1:%.*]] = icmp eq i64 [[I6_LDIST1]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT_LDIST1]], label %[[FOR_BODY_PH:.*]], label %[[FOR_BODY_LDIST1]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[FOR_BODY_PH]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[I1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[SUB3:%.*]] = add i32 [[SUB_NEG]], [[I1]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB3]] to i64
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[I5:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA0]], !alias.scope [[META9]], !noalias [[META11]]
+; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[I5]], 2
+; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IDXPROM]]
+; CHECK-NEXT: store i32 [[ADD27]], ptr [[ARRAYIDX31]], align 4, !tbaa [[TBAA0]], !alias.scope [[META15]], !noalias [[META6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[I6:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[I6]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[IF_END_LOOPEXIT_LOOPEXIT22:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP16]]
+; CHECK: [[IF_END_LOOPEXIT_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[IF_END_LOOPEXIT:.*]]
+; CHECK: [[IF_END_LOOPEXIT_LOOPEXIT22]]:
+; CHECK-NEXT: br label %[[IF_END_LOOPEXIT]]
+; CHECK: [[IF_END_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %len, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %sub.neg = add nsw i32 %len, -2147483647
+ %cmp1.not51 = icmp eq i32 %len, 1
+ br i1 %cmp1.not51, label %if.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %if.then
+ %add = sub nuw i32 -2147483648, %len
+ %i = zext nneg i32 %add to i64
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i64 [ %i, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %i1 = trunc i64 %indvars.iv to i32
+ %sub3 = add i32 %sub.neg, %i1
+ %idxprom = sext i32 %sub3 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+ %i2 = load i32, ptr %arrayidx, align 4, !tbaa !9
+ %add4 = add nsw i32 %i2, 1
+ %arrayidx8 = getelementptr inbounds i32, ptr %a, i64 %idxprom
+ store i32 %add4, ptr %arrayidx8, align 4, !tbaa !9
+ %i3 = getelementptr i32, ptr %c, i64 %idxprom
+ %arrayidx17 = getelementptr i8, ptr %i3, i64 -4
+ %i4 = load i32, ptr %arrayidx17, align 4, !tbaa !9
+ %sub18 = sub nsw i32 %add4, %i4
+ store i32 %sub18, ptr %i3, align 4, !tbaa !9
+ %i5 = load i32, ptr %arrayidx8, align 4, !tbaa !9
+ %add27 = add nsw i32 %i5, 2
+ %arrayidx31 = getelementptr inbounds i32, ptr %d, i64 %idxprom
+ store i32 %add27, ptr %arrayidx31, align 4, !tbaa !9
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %i6 = and i64 %indvars.iv.next, 4294967295
+ %cmp1.not = icmp eq i64 %i6, 2147483647
+ br i1 %cmp1.not, label %if.end, label %for.body, !llvm.loop !13
+
+if.end: ; preds = %for.body, %if.then, %entry
+ ret void
+}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C++ TBAA"}
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.mustprogress"}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK: [[META11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], [[META7]]}
+; CHECK: [[META12]] = distinct !{[[META12]], [[META8]]}
+; CHECK: [[META13]] = distinct !{[[META13]], [[META8]]}
+; CHECK: [[META14]] = !{[[META12]]}
+; CHECK: [[META15]] = !{[[META13]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]}
+;.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
1bd46a9
to
7cb671a
Compare
Ping |
Ping, please review and comment. |
957e1bc
to
4aee60d
Compare
if (Ptr->hasOneUse() || Partition == -1) | ||
continue; | ||
|
||
for (User *U : Ptr->users()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, do we ensure we also handle cases where Ptr
is used by loads with another pointer-op in between?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once we have Ptr belonging multiple partitions, any other pointer-op should be detected by the check generation code which emits all the checks for overlapping pointers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are other instructions other than LoadInst that only read from memory, such as calls marked with ModRefInfo::Ref
. I think this should use LAI.getInstructionsForAccess(Ptr, false)
as the code above, best even integrated with the code above: If RtPtrCheck->Pointers[I].IsWritePtr
is true, compare LAI.getInstructionsForAccess(Ptr, false)
and LAI.getInstructionsForAccess(Ptr, true)
Emit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses.
4aee60d
to
af3207b
Compare
if (Ptr->hasOneUse() || Partition == -1) | ||
continue; | ||
|
||
for (User *U : Ptr->users()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are other instructions other than LoadInst that only read from memory, such as calls marked with ModRefInfo::Ref
. I think this should use LAI.getInstructionsForAccess(Ptr, false)
as the code above, best even integrated with the code above: If RtPtrCheck->Pointers[I].IsWritePtr
is true, compare LAI.getInstructionsForAccess(Ptr, false)
and LAI.getInstructionsForAccess(Ptr, true)
|
||
bool ProcessLoads = false; | ||
for (auto *U : Ptr->users()) { | ||
auto *CurLoad = dyn_cast<LoadInst>(U); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LAI.getInstructionsForAccess(Ptr, false)
should already contain all the LoadInsts. No need to iterate over all instructions.
if (Partition == (int)this->InstToPartitionId[ReadInst]) | ||
continue; | ||
|
||
// -1 means belonging to multiple partitions. | ||
Partition = -1; | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be the same code as for the write case. Consider adding LAI.getInstructionsForAccess(Ptr, false)
an d LAI.getInstructionsForAccess(Ptr, true)
into a common list and iterator over it, or wrap this code into a lambda and calling it for each.
The former would be nicer if you changed the signature of SmallVector<Instruction *, 4>MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool IsWrite)
to void getInstructionsForAccess(Value *Ptr, bool IsWrite, SmallVectorImpl<Instruction*> &Result)
so one would reuse the same list. SmallVector's should not be returned anyway.
63d5c7b
to
4740e98
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why make it more complicated than necessary?
auto Instructions = | ||
LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr); | ||
const MemoryDepChecker &DC = LAI.getDepChecker(); | ||
bool IsWritePtr = !RtPtrCheck->Pointers[I].IsWritePtr; | ||
if (!DC.getOrderForAccess(Ptr, IsWritePtr).empty()) { | ||
auto AltInstructions = LAI.getInstructionsForAccess(Ptr, IsWritePtr); | ||
Instructions.append(AltInstructions.begin(), AltInstructions.end()); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto Instructions = | |
LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr); | |
const MemoryDepChecker &DC = LAI.getDepChecker(); | |
bool IsWritePtr = !RtPtrCheck->Pointers[I].IsWritePtr; | |
if (!DC.getOrderForAccess(Ptr, IsWritePtr).empty()) { | |
auto AltInstructions = LAI.getInstructionsForAccess(Ptr, IsWritePtr); | |
Instructions.append(AltInstructions.begin(), AltInstructions.end()); | |
} | |
auto Instructions = LAI.getInstructionsForAccess(Ptr, true); | |
auto ReadInstructions = LAI.getInstructionsForAccess(Ptr, false); | |
Instructions.append(ReadInstructions.begin(), ReadInstructions.end()); |
getOrderForAccess().empty()
just returns whether getInstructionsForAccess
will be empty. If that is the case, there isn't any additional overhead of calling getInstructionsForAccess
directly.
4740e98
to
d516377
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thank you
Could you make the title more concrete before landing? Such as "Also consider reads during partitioning" |
Add back in getInstructionsForAccess as query for Instructions modifying memory to mark cross partition accesses.
d516377
to
0dadf65
Compare
…m#145623) Emit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses. The test case was obtained from SuperTest, which SiFive runs regularly. We enabled LoopDistribution by default in our downstream compiler, this change was part of that enablement.
Emit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses.
The test case was obtained from SuperTest, which SiFive runs regularly. We enabled LoopDistribution by default in our downstream compiler, this change was part of that enablement.