-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[EarlyCSE] Add support for writeonly call CSE #145474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesAdd support for CSE of writeonly calls, similar to the existing support for readonly calls. Full diff: https://github.com/llvm/llvm-project/pull/145474.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 381de60fcface..caa9c4cd0401f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -493,7 +493,7 @@ struct CallValue {
static bool canHandle(Instruction *Inst) {
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (!CI || !CI->onlyReadsMemory() ||
+ if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) ||
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
// problematic for coroutines, since coroutines may resume in a
@@ -1626,14 +1626,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
!(MemInst.isValid() && !MemInst.mayReadFromMemory()))
LastStore = nullptr;
- // If this is a read-only call, process it.
- if (CallValue::canHandle(&Inst)) {
+ // If this is a read-only or write-only call, process it. Skip store
+ // MemInsts, as they will be more precisely handled lateron.
+ if (CallValue::canHandle(&Inst) &&
+ (!MemInst.isValid() || !MemInst.isStore())) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
if (InVal.first != nullptr &&
isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
- &Inst)) {
+ &Inst) &&
+ InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {
LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
<< " to: " << *InVal.first << '\n');
if (!DebugCounter::shouldExecute(CSECounter)) {
@@ -1651,6 +1654,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Increase memory generation for writes. Do this before inserting
+ // the call, so it has the generation after the write occurred.
+ if (Inst.mayWriteToMemory())
+ ++CurrentGeneration;
+
// Otherwise, remember that we have this instruction.
AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
continue;
diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll
index 0bfffa3c825a3..c09b913f9ff2b 100644
--- a/llvm/test/Transforms/EarlyCSE/writeonly.ll
+++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s
+; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO-MSSA
+; RUN: opt -S -passes='early-cse<memssa>' < %s | FileCheck %s --check-prefixes=CHECK,MSSA
@var = global i32 undef
declare void @foo() nounwind
@@ -15,3 +16,148 @@ define void @test() {
store i32 2, ptr @var
ret void
}
+
+declare void @writeonly_void() memory(write)
+
+; Can CSE writeonly calls, including non-nounwind/willreturn.
+define void @writeonly_cse() {
+; CHECK-LABEL: @writeonly_cse(
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: ret void
+;
+ call void @writeonly_void()
+ call void @writeonly_void()
+ ret void
+}
+
+; Can CSE, loads do not matter.
+define i32 @writeonly_cse_intervening_load(ptr %p) {
+; CHECK-LABEL: @writeonly_cse_intervening_load(
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
+ call void @writeonly_void()
+ %v = load i32, ptr %p
+ call void @writeonly_void()
+ ret i32 %v
+}
+
+; Cannot CSE, the store may be to the same memory.
+define void @writeonly_cse_intervening_store(ptr %p) {
+; CHECK-LABEL: @writeonly_cse_intervening_store(
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: ret void
+;
+ call void @writeonly_void()
+ store i32 0, ptr %p
+ call void @writeonly_void()
+ ret void
+}
+
+; Can CSE, the store does not alias the writeonly call.
+define void @writeonly_cse_intervening_noalias_store(ptr noalias %p) {
+; NO-MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
+; NO-MSSA-NEXT: call void @writeonly_void()
+; NO-MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4
+; NO-MSSA-NEXT: call void @writeonly_void()
+; NO-MSSA-NEXT: ret void
+;
+; MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
+; MSSA-NEXT: call void @writeonly_void()
+; MSSA-NEXT: store i32 0, ptr [[P:%.*]], align 4
+; MSSA-NEXT: ret void
+;
+ call void @writeonly_void()
+ store i32 0, ptr %p
+ call void @writeonly_void()
+ ret void
+}
+
+; Cannot CSE loads across writeonly call.
+define i32 @load_cse_across_writeonly(ptr %p) {
+; CHECK-LABEL: @load_cse_across_writeonly(
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v1 = load i32, ptr %p
+ call void @writeonly_void()
+ %v2 = load i32, ptr %p
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
+
+; Can CSE loads across eliminated writeonly call.
+define i32 @load_cse_across_csed_writeonly(ptr %p) {
+; CHECK-LABEL: @load_cse_across_csed_writeonly(
+; CHECK-NEXT: call void @writeonly_void()
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: ret i32 0
+;
+ call void @writeonly_void()
+ %v1 = load i32, ptr %p
+ call void @writeonly_void()
+ %v2 = load i32, ptr %p
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
+
+declare i32 @writeonly(ptr %p) memory(write)
+
+; Can CSE writeonly calls with arg and return.
+define i32 @writeonly_ret_cse(ptr %p) {
+; CHECK-LABEL: @writeonly_ret_cse(
+; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P:%.*]])
+; CHECK-NEXT: ret i32 0
+;
+ %v1 = call i32 @writeonly(ptr %p)
+ %v2 = call i32 @writeonly(ptr %p)
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
+
+; Cannot CSE writeonly calls with different arguments.
+define i32 @writeonly_different_args(ptr %p1, ptr %p2) {
+; CHECK-LABEL: @writeonly_different_args(
+; CHECK-NEXT: [[V1:%.*]] = call i32 @writeonly(ptr [[P1:%.*]])
+; CHECK-NEXT: [[V2:%.*]] = call i32 @writeonly(ptr [[P2:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = sub i32 [[V1]], [[V2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %v1 = call i32 @writeonly(ptr %p1)
+ %v2 = call i32 @writeonly(ptr %p2)
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
+
+declare void @callee()
+
+; These are weird cases where the same call is both readonly and writeonly
+; based on call-site attributes. I believe this implies that both calls are
+; actually readnone and safe to CSE, but leave them alone to be conservative.
+define void @readonly_and_writeonly() {
+; CHECK-LABEL: @readonly_and_writeonly(
+; CHECK-NEXT: call void @callee() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: call void @callee() #[[ATTR1]]
+; CHECK-NEXT: ret void
+;
+ call void @callee() memory(read)
+ call void @callee() memory(write)
+ ret void
+}
+
+define void @writeonly_and_readonly() {
+; CHECK-LABEL: @writeonly_and_readonly(
+; CHECK-NEXT: call void @callee() #[[ATTR1]]
+; CHECK-NEXT: call void @callee() #[[ATTR2]]
+; CHECK-NEXT: ret void
+;
+ call void @callee() memory(write)
+ call void @callee() memory(read)
+ ret void
+}
|
// If we have an available version of this call, and if it is the right | ||
// generation, replace this instruction. | ||
std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst); | ||
if (InVal.first != nullptr && | ||
isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first, | ||
&Inst)) { | ||
&Inst) && | ||
InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added this to handle weird cases like the readonly_and_writeonly test, though I don't think it's strictly necessary...
This is a revival of https://reviews.llvm.org/D116609 by @d0k. This patch differs in a few ways:
|
|
These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.
Blocked on #145492. |
https://godbolt.org/z/e3b44Ee1G Volatile memset is treated as write-only. |
Yes, see #122926. |
These intrinsics introduced in #84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with #145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.
7b8b65d
to
c636c35
Compare
…145492) These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This patch eliminates many math libcalls that set errno
. However, it causes some regressions when processing llvm.memset
.
See the following pattern:
memset(p)
if (Cond) {
memset(p);
mem access with p
}
After this patch, the second memset is removed. However, MemCpyOptPass
does better since it only removes the first one.
As MemCpyOpt
handles memory intrinsics more precisely, can we bail out on memset here?
// If this is a read-only call, process it. | ||
if (CallValue::canHandle(&Inst)) { | ||
// If this is a read-only or write-only call, process it. Skip store | ||
// MemInsts, as they will be more precisely handled lateron. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// MemInsts, as they will be more precisely handled lateron. | |
// MemInsts, as they will be more precisely handled later on. |
This checks for the interaction with #145474.
c636c35
to
2b59ce1
Compare
Skipping memsets for now. I added a PhaseOrdering test in ec150a9. I think what we're mainly missing is store sinking support in DSE. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
…145492) These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.
This checks for the interaction with llvm#145474.
Add support for CSE of writeonly calls, similar to the existing support for readonly calls.
Add support for CSE of writeonly calls, similar to the existing support for readonly calls.
Add support for CSE of writeonly calls, similar to the existing support for readonly calls.