-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[LoopDist] Add some runtime checks for cross partition loads #145623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Michael Berg (mcberg2021) ChangesEmit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses. Full diff: https://github.com/llvm/llvm-project/pull/145623.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 0ac1a15981770..5041a71fcea4a 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -521,6 +521,33 @@ class InstPartitionContainer {
Partition = -1;
}
assert(Partition != -2 && "Pointer not belonging to any partition");
+ // All the store context uses of our address were processed,
+ // Now make sure we don't have cross partition loads.
+ if (RtPtrCheck->Pointers[I].IsWritePtr) {
+ if (Ptr->hasOneUse() || Partition == -1)
+ continue;
+
+ bool ProcessLoads = false;
+ for (User *U : Ptr->users())
+ if (auto *CurLoad = dyn_cast<LoadInst>(U))
+ if (L->contains(CurLoad->getParent())) {
+ ProcessLoads = true;
+ break;
+ }
+
+ if (!ProcessLoads)
+ continue;
+
+ const bool IsWritePtr = false;
+ auto Instructions = LAI.getInstructionsForAccess(Ptr, IsWritePtr);
+ for (Instruction *Inst : Instructions)
+ if (Partition != (int)this->InstToPartitionId[Inst]) {
+ // -1 means belonging to multiple partitions.
+ Partition = -1;
+ break;
+ }
+
+ }
}
return PtrToPartitions;
diff --git a/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll b/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll
new file mode 100644
index 0000000000000..db88943d6a722
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/cross-partition-access.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -aa-pipeline=basic-aa -passes=loop-simplify,loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S %s | FileCheck %s
+
+; Test emit safety guards for ptr access of %a and %c on a cross parition load which
+; has a corresponding store to the same address. This ensures that if %a and %c
+; overlap in some scenarios, that we execute the original loop for safety reasons.
+
+define dso_local void @_Z13distribution3PiS_S_S_i(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, ptr nocapture noundef %c, ptr nocapture noundef writeonly %d, i32 noundef signext %len) {
+; CHECK-LABEL: define dso_local void @_Z13distribution3PiS_S_S_i(
+; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef captures(none) [[C:%.*]], ptr noundef writeonly captures(none) [[D:%.*]], i32 noundef signext [[LEN:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[LEN]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[SUB_NEG:%.*]] = add nsw i32 [[LEN]], -2147483647
+; CHECK-NEXT: [[CMP1_NOT51:%.*]] = icmp eq i32 [[LEN]], 1
+; CHECK-NEXT: br i1 [[CMP1_NOT51]], label %[[IF_END]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[ADD:%.*]] = sub nuw i32 -2147483648, [[LEN]]
+; CHECK-NEXT: [[I:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_CHECK:.*]]
+; CHECK: [[FOR_BODY_LVER_CHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -2147483647, [[LEN]]
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 8589934596, [[TMP2]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[D]], i64 4
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[D]], i64 [[TMP3]]
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[C]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND07:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND18:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT9]]
+; CHECK-NEXT: [[BOUND010:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP6]]
+; CHECK-NEXT: [[BOUND111:%.*]] = icmp ult ptr [[SCEVGEP5]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
+; CHECK-NEXT: [[CONFLICT_RDX13:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT12]]
+; CHECK-NEXT: [[BOUND014:%.*]] = icmp ult ptr [[C]], [[SCEVGEP4]]
+; CHECK-NEXT: [[BOUND115:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP2]]
+; CHECK-NEXT: [[FOUND_CONFLICT16:%.*]] = and i1 [[BOUND014]], [[BOUND115]]
+; CHECK-NEXT: [[CONFLICT_RDX17:%.*]] = or i1 [[CONFLICT_RDX13]], [[FOUND_CONFLICT16]]
+; CHECK-NEXT: [[BOUND018:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP6]]
+; CHECK-NEXT: [[BOUND119:%.*]] = icmp ult ptr [[SCEVGEP5]], [[SCEVGEP4]]
+; CHECK-NEXT: [[FOUND_CONFLICT20:%.*]] = and i1 [[BOUND018]], [[BOUND119]]
+; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX17]], [[FOUND_CONFLICT20]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -2147483647, [[LEN]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 2147483647, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP6]], 4294967295
+; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 1, [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP6]], 4294967295
+; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP11]], [[TMP16]]
+; CHECK-NEXT: [[LVER_SAFE:%.*]] = or i1 [[CONFLICT_RDX21]], [[TMP17]]
+; CHECK-NEXT: br i1 [[LVER_SAFE]], label %[[FOR_BODY_PH_LVER_ORIG:.*]], label %[[FOR_BODY_PH_LDIST1:.*]]
+; CHECK: [[FOR_BODY_PH_LVER_ORIG]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_ORIG:.*]]
+; CHECK: [[FOR_BODY_LVER_ORIG]]:
+; CHECK-NEXT: [[INDVARS_IV_LVER_ORIG:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH_LVER_ORIG]] ], [ [[INDVARS_IV_NEXT_LVER_ORIG:%.*]], %[[FOR_BODY_LVER_ORIG]] ]
+; CHECK-NEXT: [[I1_LVER_ORIG:%.*]] = trunc i64 [[INDVARS_IV_LVER_ORIG]] to i32
+; CHECK-NEXT: [[SUB3_LVER_ORIG:%.*]] = add i32 [[SUB_NEG]], [[I1_LVER_ORIG]]
+; CHECK-NEXT: [[IDXPROM_LVER_ORIG:%.*]] = sext i32 [[SUB3_LVER_ORIG]] to i64
+; CHECK-NEXT: [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: [[I2_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX_LVER_ORIG]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[ADD4_LVER_ORIG:%.*]] = add nsw i32 [[I2_LVER_ORIG]], 1
+; CHECK-NEXT: [[ARRAYIDX8_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[ADD4_LVER_ORIG]], ptr [[ARRAYIDX8_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[I3_LVER_ORIG:%.*]] = getelementptr i32, ptr [[C]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: [[ARRAYIDX17_LVER_ORIG:%.*]] = getelementptr i8, ptr [[I3_LVER_ORIG]], i64 -4
+; CHECK-NEXT: [[I4_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX17_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[SUB18_LVER_ORIG:%.*]] = sub nsw i32 [[ADD4_LVER_ORIG]], [[I4_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[SUB18_LVER_ORIG]], ptr [[I3_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[I5_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDX8_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[ADD27_LVER_ORIG:%.*]] = add nsw i32 [[I5_LVER_ORIG]], 2
+; CHECK-NEXT: [[ARRAYIDX31_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT: store i32 [[ADD27_LVER_ORIG]], ptr [[ARRAYIDX31_LVER_ORIG]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_LVER_ORIG]] = add i64 [[INDVARS_IV_LVER_ORIG]], 1
+; CHECK-NEXT: [[I6_LVER_ORIG:%.*]] = and i64 [[INDVARS_IV_NEXT_LVER_ORIG]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT_LVER_ORIG:%.*]] = icmp eq i64 [[I6_LVER_ORIG]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT_LVER_ORIG]], label %[[IF_END_LOOPEXIT_LOOPEXIT:.*]], label %[[FOR_BODY_LVER_ORIG]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[FOR_BODY_PH_LDIST1]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LDIST1:.*]]
+; CHECK: [[FOR_BODY_LDIST1]]:
+; CHECK-NEXT: [[INDVARS_IV_LDIST1:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH_LDIST1]] ], [ [[INDVARS_IV_NEXT_LDIST1:%.*]], %[[FOR_BODY_LDIST1]] ]
+; CHECK-NEXT: [[I1_LDIST1:%.*]] = trunc i64 [[INDVARS_IV_LDIST1]] to i32
+; CHECK-NEXT: [[SUB3_LDIST1:%.*]] = add i32 [[SUB_NEG]], [[I1_LDIST1]]
+; CHECK-NEXT: [[IDXPROM_LDIST1:%.*]] = sext i32 [[SUB3_LDIST1]] to i64
+; CHECK-NEXT: [[ARRAYIDX_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: [[I2_LDIST1:%.*]] = load i32, ptr [[ARRAYIDX_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT: [[ADD4_LDIST1:%.*]] = add nsw i32 [[I2_LDIST1]], 1
+; CHECK-NEXT: [[ARRAYIDX8_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: store i32 [[ADD4_LDIST1]], ptr [[ARRAYIDX8_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
+; CHECK-NEXT: [[I3_LDIST1:%.*]] = getelementptr i32, ptr [[C]], i64 [[IDXPROM_LDIST1]]
+; CHECK-NEXT: [[ARRAYIDX17_LDIST1:%.*]] = getelementptr i8, ptr [[I3_LDIST1]], i64 -4
+; CHECK-NEXT: [[I4_LDIST1:%.*]] = load i32, ptr [[ARRAYIDX17_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; CHECK-NEXT: [[SUB18_LDIST1:%.*]] = sub nsw i32 [[ADD4_LDIST1]], [[I4_LDIST1]]
+; CHECK-NEXT: store i32 [[SUB18_LDIST1]], ptr [[I3_LDIST1]], align 4, !tbaa [[TBAA0]], !alias.scope [[META14]], !noalias [[META15]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT_LDIST1]] = add i64 [[INDVARS_IV_LDIST1]], 1
+; CHECK-NEXT: [[I6_LDIST1:%.*]] = and i64 [[INDVARS_IV_NEXT_LDIST1]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT_LDIST1:%.*]] = icmp eq i64 [[I6_LDIST1]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT_LDIST1]], label %[[FOR_BODY_PH:.*]], label %[[FOR_BODY_LDIST1]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[FOR_BODY_PH]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[I]], %[[FOR_BODY_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[I1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[SUB3:%.*]] = add i32 [[SUB_NEG]], [[I1]]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB3]] to i64
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[I5:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA0]], !alias.scope [[META9]], !noalias [[META11]]
+; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[I5]], 2
+; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IDXPROM]]
+; CHECK-NEXT: store i32 [[ADD27]], ptr [[ARRAYIDX31]], align 4, !tbaa [[TBAA0]], !alias.scope [[META15]], !noalias [[META6]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[I6:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
+; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[I6]], 2147483647
+; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[IF_END_LOOPEXIT_LOOPEXIT22:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP16]]
+; CHECK: [[IF_END_LOOPEXIT_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[IF_END_LOOPEXIT:.*]]
+; CHECK: [[IF_END_LOOPEXIT_LOOPEXIT22]]:
+; CHECK-NEXT: br label %[[IF_END_LOOPEXIT]]
+; CHECK: [[IF_END_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[IF_END]]
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %len, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %sub.neg = add nsw i32 %len, -2147483647
+ %cmp1.not51 = icmp eq i32 %len, 1
+ br i1 %cmp1.not51, label %if.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %if.then
+ %add = sub nuw i32 -2147483648, %len
+ %i = zext nneg i32 %add to i64
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i64 [ %i, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %i1 = trunc i64 %indvars.iv to i32
+ %sub3 = add i32 %sub.neg, %i1
+ %idxprom = sext i32 %sub3 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+ %i2 = load i32, ptr %arrayidx, align 4, !tbaa !9
+ %add4 = add nsw i32 %i2, 1
+ %arrayidx8 = getelementptr inbounds i32, ptr %a, i64 %idxprom
+ store i32 %add4, ptr %arrayidx8, align 4, !tbaa !9
+ %i3 = getelementptr i32, ptr %c, i64 %idxprom
+ %arrayidx17 = getelementptr i8, ptr %i3, i64 -4
+ %i4 = load i32, ptr %arrayidx17, align 4, !tbaa !9
+ %sub18 = sub nsw i32 %add4, %i4
+ store i32 %sub18, ptr %i3, align 4, !tbaa !9
+ %i5 = load i32, ptr %arrayidx8, align 4, !tbaa !9
+ %add27 = add nsw i32 %i5, 2
+ %arrayidx31 = getelementptr inbounds i32, ptr %d, i64 %idxprom
+ store i32 %add27, ptr %arrayidx31, align 4, !tbaa !9
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %i6 = and i64 %indvars.iv.next, 4294967295
+ %cmp1.not = icmp eq i64 %i6, 2147483647
+ br i1 %cmp1.not, label %if.end, label %for.body, !llvm.loop !13
+
+if.end: ; preds = %for.body, %if.then, %entry
+ ret void
+}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !11, i64 0}
+!11 = !{!"omnipotent char", !12, i64 0}
+!12 = !{!"Simple C++ TBAA"}
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.mustprogress"}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]]}
+; CHECK: [[META5]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK: [[META11]] = !{[[META12:![0-9]+]], [[META13:![0-9]+]], [[META7]]}
+; CHECK: [[META12]] = distinct !{[[META12]], [[META8]]}
+; CHECK: [[META13]] = distinct !{[[META13]], [[META8]]}
+; CHECK: [[META14]] = !{[[META12]]}
+; CHECK: [[META15]] = !{[[META13]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META5]]}
+;.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
||
const bool IsWritePtr = false; | ||
auto Instructions = LAI.getInstructionsForAccess(Ptr, IsWritePtr); | ||
for (Instruction *Inst : Instructions) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We actually have to know that we have loads before calling LAI.getInstructionsForAccess(Ptr, IsWritePtr)
as it will assert when we do not.
Emit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses.
1bd46a9
to
7cb671a
Compare
Emit safety guards for ptr accesses when cross partition loads exist which have a corresponding store to the same address in a different partition. This will emit the necessary ptr checks for these accesses.
The test case was obtained from SuperTest, which SiFive runs regularly. We enabled LoopDistribution by default in our downstream compiler, this change was part of that enablement.