-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[EarlyCSE] Remove void return restriction for call CSE #145320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
For readonly/readnone calls returns void we can't CSE the return value. However, making these participate in CSE is still useful, because we *can* CSE calls that are not willreturn/nounwind (which otherwise cannot be DCEd). The more interesting use-case is CSE for writeonly calls (not yet supported), but I figured this change makes sense independently. There is no impact on compile-time.
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesFor readonly/readnone calls returning void we can't CSE the return The more interesting use-case is CSE for writeonly calls (not There is no impact on compile-time. Full diff: https://github.com/llvm/llvm-project/pull/145320.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index e1e283f171d38..381de60fcface 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -133,7 +133,7 @@ struct SimpleValue {
}
}
}
- return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
+ return CI->doesNotAccessMemory() &&
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
// problematic for coroutines, since coroutines may resume in a
@@ -492,10 +492,6 @@ struct CallValue {
}
static bool canHandle(Instruction *Inst) {
- // Don't value number anything that returns void.
- if (Inst->getType()->isVoidTy())
- return false;
-
CallInst *CI = dyn_cast<CallInst>(Inst);
if (!CI || !CI->onlyReadsMemory() ||
// FIXME: Currently the calls which may access the thread id may
diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll
index 2c6b2a9613924..f877235ed9787 100644
--- a/llvm/test/Transforms/EarlyCSE/basic.ll
+++ b/llvm/test/Transforms/EarlyCSE/basic.ll
@@ -146,6 +146,28 @@ define i32 @test5(ptr%P) {
ret i32 %Diff
}
+declare void @void_func()
+
+define void @void_func_cse_readonly(ptr %P) {
+; CHECK-LABEL: @void_func_cse_readonly(
+; CHECK-NEXT: call void @void_func(ptr [[P:%.*]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @void_func(ptr %P) memory(read)
+ call void @void_func(ptr %P) memory(read)
+ ret void
+}
+
+define void @void_func_cse_readnone(ptr %P) {
+; CHECK-LABEL: @void_func_cse_readnone(
+; CHECK-NEXT: call void @void_func(ptr [[P:%.*]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @void_func(ptr %P) memory(none)
+ call void @void_func(ptr %P) memory(none)
+ ret void
+}
+
!0 = !{!"branch_weights", i32 95}
!1 = !{!"branch_weights", i32 95}
@@ -186,7 +208,7 @@ define void @test7(ptr%P) {
;; Readnone functions aren't invalidated by stores.
define i32 @test8(ptr%P) {
; CHECK-LABEL: @test8(
-; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P:%.*]]) #[[ATTR2]]
; CHECK-NEXT: store i32 4, ptr [[P]], align 4
; CHECK-NEXT: ret i32 0
;
@@ -202,7 +224,7 @@ define i32 @test8(ptr%P) {
define i32 @test9(ptr%P) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: store i32 4, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[V1:%.*]] = call i32 @func(ptr [[P]]) #[[ATTR1]]
; CHECK-NEXT: store i32 5, ptr [[P]], align 4
; CHECK-NEXT: ret i32 [[V1]]
;
diff --git a/llvm/test/Transforms/EarlyCSE/flags.ll b/llvm/test/Transforms/EarlyCSE/flags.ll
index 78b3818b211da..dcaaacbac639f 100644
--- a/llvm/test/Transforms/EarlyCSE/flags.ll
+++ b/llvm/test/Transforms/EarlyCSE/flags.ll
@@ -3,7 +3,7 @@
; RUN: opt -passes='early-cse<memssa>' -S < %s | FileCheck %s
declare void @use(i1)
-declare void @use.ptr(ptr) memory(read)
+declare void @use.ptr(i32, ptr) memory(read)
define void @test1(float %x, float %y) {
; CHECK-LABEL: @test1(
@@ -52,42 +52,42 @@ define void @test_inbounds_program_not_ub_if_first_gep_poison(ptr %ptr, i64 %n)
define void @load_both_nonnull(ptr %p) {
; CHECK-LABEL: @load_both_nonnull(
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META0:![0-9]+]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p, !nonnull !{}
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
define void @load_first_nonnull(ptr %p) {
; CHECK-LABEL: @load_first_nonnull(
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
define void @load_first_nonnull_noundef(ptr %p) {
; CHECK-LABEL: @load_first_nonnull_noundef(
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META0]], !noundef [[META0]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
diff --git a/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll b/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll
index 0a001b55f684c..2d67186531481 100644
--- a/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll
+++ b/llvm/test/Transforms/EarlyCSE/noalias-addrspace.ll
@@ -2,20 +2,20 @@
; RUN: opt -passes='early-cse<memssa>' -S < %s | FileCheck %s
declare void @use(i1)
-declare void @use.ptr(ptr) memory(read)
+declare void @use.ptr(i32, ptr) memory(read)
define void @load_first_noalias_addrspace(ptr %p) {
; CHECK-LABEL: define void @load_first_noalias_addrspace(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0:![0-9]+]], !noundef [[META0]], !noalias.addrspace [[META1:![0-9]+]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
@@ -23,14 +23,14 @@ define void @load_both_same_noalias_addrspace(ptr %p) {
; CHECK-LABEL: define void @load_both_same_noalias_addrspace(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p, !noalias.addrspace !0
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
@@ -38,14 +38,14 @@ define void @load_both_disjoint_noalias_addrspace(ptr %p) {
; CHECK-LABEL: define void @load_both_disjoint_noalias_addrspace(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p, !noalias.addrspace !1
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
@@ -53,14 +53,14 @@ define void @load_both_overlap_noalias_addrspace(ptr %p) {
; CHECK-LABEL: define void @load_both_overlap_noalias_addrspace(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]], !noundef [[META0]], !noalias.addrspace [[META1]]
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
-; CHECK-NEXT: call void @use.ptr(ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 0, ptr [[V1]])
+; CHECK-NEXT: call void @use.ptr(i32 1, ptr [[V1]])
; CHECK-NEXT: ret void
;
%v1 = load ptr, ptr %p, !nonnull !{}, !noundef !{}, !noalias.addrspace !0
- call void @use.ptr(ptr %v1)
+ call void @use.ptr(i32 0, ptr %v1)
%v2 = load ptr, ptr %p, !noalias.addrspace !2
- call void @use.ptr(ptr %v2)
+ call void @use.ptr(i32 1, ptr %v2)
ret void
}
|
@@ -146,6 +146,28 @@ define i32 @test5(ptr%P) { | |||
ret i32 %Diff | |||
} | |||
|
|||
declare void @void_func() | |||
|
|||
define void @void_func_cse_readonly(ptr %P) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once upon a time, we had a baked in assumption that in order to throw, a routine must write memory. Have we changed that at some point? I'm asking because otherwise, a void readonly call would seem to have no observable effect and thus be trivially dead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://godbolt.org/z/GMeqhaMTj It needs nounwind/willreturn
to eliminate the call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once upon a time, we had a baked in assumption that in order to throw, a routine must write memory. Have we changed that at some point?
I think this holds as a practical matter, because the implementation of the unwinding mechanism is probably going to involved writes in some form. But I don't think we ever make IR level assumptions that readonly calls cannot unwind.
I'm asking because otherwise, a void readonly call would seem to have no observable effect and thus be trivially dead?
Even if we ignore the unwind case, you can have a non-willreturn readnone function, e.g. a function that only contains an infinite loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, the divergence (infinite loop) case is a good example. We originally assumed that infinite loops are fully undefined (unless we used volatile/atomic - which imply writes), but we've added mustprogress/willreturn, so that's now definitely well defined in at least some cases.
Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
For readonly/readnone calls returning void we can't CSE the return value. However, making these participate in CSE is still useful, because it allows DCE of calls that are not willreturn/nounwind (something no other part of LLVM is capable of removing). The more interesting use-case is CSE for writeonly calls (not yet supported), but I figured this change makes sense independently. There is no impact on compile-time.
For readonly/readnone calls returning void we can't CSE the return value. However, making these participate in CSE is still useful, because it allows DCE of calls that are not willreturn/nounwind (something no other part of LLVM is capable of removing). The more interesting use-case is CSE for writeonly calls (not yet supported), but I figured this change makes sense independently. There is no impact on compile-time.
For readonly/readnone calls returning void we can't CSE the return
value. However, making these participate in CSE is still useful,
because it allows DCE of calls that are not willreturn/nounwind
(something no other part of LLVM is capable of removing).
The more interesting use-case is CSE for writeonly calls (not
yet supported), but I figured this change makes sense independently.
There is no impact on compile-time.