-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[InstCombine] Fold ceil(X / (2 ^ C)) == 0
-> X == 0
#143683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Iris Shi (el-ev) Changes
https://alive2.llvm.org/ce/z/q5-XqY Full diff: https://github.com/llvm/llvm-project/pull/143683.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..4569a2cd82d2a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CmpPredicate.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
@@ -1298,6 +1299,23 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
// eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
// will fold to a constant elsewhere.
}
+
+ // (X >> C) + ((X & ((1 << C) - 1)) != 0) == 0 -> X == 0
+ if (Pred == ICmpInst::ICMP_EQ) {
+ Value *X;
+ const APInt *C1, *C2;
+ CmpPredicate PredNE;
+ if (match(Cmp.getOperand(0),
+ m_OneUse(
+ m_Add(m_LShr(m_Value(X), m_APInt(C1)),
+ m_ZExt(m_ICmp(PredNE, m_And(m_Deferred(X), m_APInt(C2)),
+ m_Zero()))))) &&
+ PredNE == CmpInst::ICMP_NE &&
+ *C2 == APInt::getLowBitsSet(C2->getBitWidth(), C1->getZExtValue()))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::getNullValue(X->getType()));
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/ceil-shift.ll b/llvm/test/Transforms/InstCombine/ceil-shift.ll
new file mode 100644
index 0000000000000..853985eff258d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ceil-shift.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @ceil_shift4(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %1 = lshr i32 %arg0, 4
+ %2 = and i32 %arg0, 15
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define i1 @ceil_shift6(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift6(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %1 = lshr i32 %arg0, 6
+ %2 = and i32 %arg0, 63
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define i1 @ceil_shift11(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift11(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %1 = lshr i32 %arg0, 11
+ %2 = and i32 %arg0, 2047
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define i1 @ceil_shift0(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift0(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %1 = lshr i32 %arg0, 0
+ %2 = and i32 %arg0, 0
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+declare void @use(i32)
+
+define i1 @ceil_shift4_used_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT: call void @use(i32 [[TMP1]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %1 = lshr i32 %arg0, 4
+ call void @use(i32 %1)
+ %2 = and i32 %arg0, 15
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define i1 @ceil_shift4_used_5(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_5(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT: call void @use(i32 [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %1 = lshr i32 %arg0, 4
+ %2 = and i32 %arg0, 15
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ call void @use(i32 %5)
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define <4 x i1> @ceil_shift4_v4i32(<4 x i32> %arg0) {
+; CHECK-LABEL: define <4 x i1> @ceil_shift4_v4i32(
+; CHECK-SAME: <4 x i32> [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[ARG0]], zeroinitializer
+; CHECK-NEXT: ret <4 x i1> [[TMP1]]
+;
+ %1 = lshr <4 x i32> %arg0, splat (i32 16)
+ %2 = and <4 x i32> %arg0, splat (i32 65535)
+ %3 = icmp ne <4 x i32> %2, zeroinitializer
+ %4 = zext <4 x i1> %3 to <4 x i32>
+ %5 = add <4 x i32> %1, %4
+ %6 = icmp eq <4 x i32> %5, zeroinitializer
+ ret <4 x i1> %6
+}
+
+define <8 x i1> @ceil_shift4_v8i16(<8 x i16> %arg0) {
+; CHECK-LABEL: define <8 x i1> @ceil_shift4_v8i16(
+; CHECK-SAME: <8 x i16> [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[ARG0]], zeroinitializer
+; CHECK-NEXT: ret <8 x i1> [[TMP1]]
+;
+ %1 = lshr <8 x i16> %arg0, splat (i16 4)
+ %2 = and <8 x i16> %arg0, splat (i16 15)
+ %3 = icmp ne <8 x i16> %2, zeroinitializer
+ %4 = zext <8 x i1> %3 to <8 x i16>
+ %5 = add <8 x i16> %1, %4
+ %6 = icmp eq <8 x i16> %5, zeroinitializer
+ ret <8 x i1> %6
+}
+
+; negative tests
+
+define i1 @ceil_shift_not_mask_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 31
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %1 = lshr i32 %arg0, 4
+ %2 = and i32 %arg0, 31
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
+
+define i1 @ceil_shift_not_mask_2(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_2(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[ARG0]], 5
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %1 = lshr i32 %arg0, 5
+ %2 = and i32 %arg0, 15
+ %3 = icmp ne i32 %2, 0
+ %4 = zext i1 %3 to i32
+ %5 = add i32 %1, %4
+ %6 = icmp eq i32 %5, 0
+ ret i1 %6
+}
|
ceil(X >> C) == 0
-> X == 0
ceil(X / (2 ^ C)) == 0
-> X == 0
410b0a1
to
69227c7
Compare
69227c7
to
14eb2e0
Compare
943a95f
to
0c68968
Compare
IIRC the regression can be fixed by handling this pattern in |
0c68968
to
90d1ed7
Compare
Updated. Could you help me rerun the benchmarks? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add tests from https://godbolt.org/z/Wcsv453n6?
c107041
to
4811b7d
Compare
((match(LHS, m_NUWMul(m_Value(), m_APIntAllowPoison(MulC))) && | ||
*MulC != 0 && C->urem(*MulC) != 0) || | ||
(match(LHS, m_NSWMul(m_Value(), m_APIntAllowPoison(MulC))) && | ||
*MulC != 0 && C->srem(*MulC) != 0))) | ||
return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE); | ||
|
||
if (ICmpInst::isGE(Pred) && C->isOne() && isKnownNonZero(LHS, Q)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't work for icmp sge
. BTW, do you know the reason that @src1
got folded without this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually it is canonicalized to
icmp ne i32 %ceil, 0
but not here.
It doesn't happen because we are simplifying umax(%x, 1)
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is handled by isImpliedByDomCondition
.
Can you file a separate patch for 4811b7d? I think we can land this change first :) |
[InstCombine] Fold `ceil(X >> C) == 0 -> X == 0` update test 1 address review comments rm header 1 1
4802be7
to
e854101
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Please wait for additional approval from other reviewers.
✅ With the latest revision this PR passed the C/C++ code formatter. |
Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>
689446a
to
da1b1d0
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>
Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>
https://alive2.llvm.org/ce/z/q5-XqY