[InstCombine] Fold `ceil(X / (2 ^ C)) == 0` -> `X == 0` #143683

el-ev · 2025-06-11T11:06:35Z

Closes Missed optimization: fold ceil(x/16) == 0 to x == 0 #143636

https://alive2.llvm.org/ce/z/q5-XqY

llvmbot · 2025-06-11T11:07:07Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Iris Shi (el-ev)

Changes

Closes #143636

https://alive2.llvm.org/ce/z/q5-XqY

Full diff: https://github.com/llvm/llvm-project/pull/143683.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+18)
(added) llvm/test/Transforms/InstCombine/ceil-shift.ll (+176)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..4569a2cd82d2a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CmpPredicate.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1298,6 +1299,23 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
     // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
     // will fold to a constant elsewhere.
   }
+
+  // (X >> C) + ((X & ((1 << C) - 1)) != 0) == 0 -> X == 0
+  if (Pred == ICmpInst::ICMP_EQ) {
+    Value *X;
+    const APInt *C1, *C2;
+    CmpPredicate PredNE;
+    if (match(Cmp.getOperand(0),
+              m_OneUse(
+                  m_Add(m_LShr(m_Value(X), m_APInt(C1)),
+                        m_ZExt(m_ICmp(PredNE, m_And(m_Deferred(X), m_APInt(C2)),
+                                      m_Zero()))))) &&
+        PredNE == CmpInst::ICMP_NE &&
+        *C2 == APInt::getLowBitsSet(C2->getBitWidth(), C1->getZExtValue()))
+      return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                          ConstantInt::getNullValue(X->getType()));
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/ceil-shift.ll b/llvm/test/Transforms/InstCombine/ceil-shift.ll
new file mode 100644
index 0000000000000..853985eff258d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ceil-shift.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @ceil_shift4(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift6(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift6(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 6
+  %2 = and i32 %arg0, 63
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift11(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift11(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 11
+  %2 = and i32 %arg0, 2047
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift0(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift0(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 0
+  %2 = and i32 %arg0, 0
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+declare void @use(i32)
+
+define i1 @ceil_shift4_used_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  call void @use(i32 %1)
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift4_used_5(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_5(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    call void @use(i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  call void @use(i32 %5)
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define <4 x i1> @ceil_shift4_v4i32(<4 x i32> %arg0) {
+; CHECK-LABEL: define <4 x i1> @ceil_shift4_v4i32(
+; CHECK-SAME: <4 x i32> [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[ARG0]], zeroinitializer
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
+;
+  %1 = lshr <4 x i32> %arg0, splat (i32 16)
+  %2 = and <4 x i32> %arg0, splat (i32 65535)
+  %3 = icmp ne <4 x i32> %2, zeroinitializer
+  %4 = zext <4 x i1> %3 to <4 x i32>
+  %5 = add <4 x i32> %1, %4
+  %6 = icmp eq <4 x i32> %5, zeroinitializer
+  ret <4 x i1> %6
+}
+
+define <8 x i1> @ceil_shift4_v8i16(<8 x i16> %arg0) {
+; CHECK-LABEL: define <8 x i1> @ceil_shift4_v8i16(
+; CHECK-SAME: <8 x i16> [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[ARG0]], zeroinitializer
+; CHECK-NEXT:    ret <8 x i1> [[TMP1]]
+;
+  %1 = lshr <8 x i16> %arg0, splat (i16 4)
+  %2 = and <8 x i16> %arg0, splat (i16 15)
+  %3 = icmp ne <8 x i16> %2, zeroinitializer
+  %4 = zext <8 x i1> %3 to <8 x i16>
+  %5 = add <8 x i16> %1, %4
+  %6 = icmp eq <8 x i16> %5, zeroinitializer
+  ret <8 x i1> %6
+}
+
+; negative tests
+
+define i1 @ceil_shift_not_mask_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 31
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift_not_mask_2(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_2(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 5
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

llvm/test/Transforms/InstCombine/ceil-shift.ll

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

dtcxzyw · 2025-06-18T15:52:08Z

IIRC the regression can be fixed by handling this pattern in isKnownNonZero. It would be better to add a helper stripNullTest to find patterns in the form of f(x) == 0 <-> x == 0.

el-ev · 2025-06-21T06:27:04Z

IIRC the regression can be fixed by handling this pattern in isKnownNonZero. It would be better to add a helper stripNullTest to find patterns in the form of f(x) == 0 <-> x == 0.

Updated. Could you help me rerun the benchmarks?

llvm/lib/Analysis/ValueTracking.cpp

dtcxzyw

Can you add tests from https://godbolt.org/z/Wcsv453n6?

dtcxzyw · 2025-06-21T13:26:40Z

llvm/lib/Analysis/InstructionSimplify.cpp

      ((match(LHS, m_NUWMul(m_Value(), m_APIntAllowPoison(MulC))) &&
        *MulC != 0 && C->urem(*MulC) != 0) ||
       (match(LHS, m_NSWMul(m_Value(), m_APIntAllowPoison(MulC))) &&
        *MulC != 0 && C->srem(*MulC) != 0)))
    return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE);

+  if (ICmpInst::isGE(Pred) && C->isOne() && isKnownNonZero(LHS, Q))


It doesn't work for icmp sge. BTW, do you know the reason that @src1 got folded without this change?

When debugging I found a call to simplifyICmpInst with icmp uge i32 %ceil, 1. Usually it is canonicalized to icmp ne i32 %ceil, 0 but not here.

Usually it is canonicalized to icmp ne i32 %ceil, 0 but not here.

It doesn't happen because we are simplifying umax(%x, 1).

It is handled by isImpliedByDomCondition.

llvm/test/Transforms/InstCombine/ceil-shift.ll

dtcxzyw · 2025-06-21T16:04:10Z

Can you file a separate patch for 4811b7d? I think we can land this change first :)

[InstCombine] Fold `ceil(X >> C) == 0 -> X == 0` update test 1 address review comments rm header 1 1

dtcxzyw

LGTM. Please wait for additional approval from other reviewers.

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

github-actions · 2025-06-22T09:38:17Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>

nikic

LGTM

Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>

el-ev requested a review from nikic as a code owner June 11, 2025 11:06

el-ev requested a review from dtcxzyw June 11, 2025 11:06

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jun 11, 2025

el-ev changed the title ~~[InstCombine] Fold ceil(X >> C) == 0 -> X == 0~~ [InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 Jun 11, 2025

dtcxzyw reviewed Jun 13, 2025

View reviewed changes

el-ev requested a review from dtcxzyw June 13, 2025 12:05

dtcxzyw requested changes Jun 13, 2025

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 410b0a1 to 69227c7 Compare June 14, 2025 02:15

el-ev requested a review from dtcxzyw June 14, 2025 02:15

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 69227c7 to 14eb2e0 Compare June 14, 2025 02:19

This was referenced Jun 14, 2025

Fuzz PR143683 dtcxzyw/llvm-mutation-based-fuzz-service#63

Closed

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2439

Closed

dtcxzyw reviewed Jun 14, 2025

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

el-ev requested a review from dtcxzyw June 14, 2025 13:10

dtcxzyw mentioned this pull request Jun 14, 2025

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2442

Open

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 943a95f to 0c68968 Compare June 15, 2025 02:42

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 0c68968 to 90d1ed7 Compare June 21, 2025 06:25

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Jun 21, 2025

zyw-bot mentioned this pull request Jun 21, 2025

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2477

Closed

dtcxzyw reviewed Jun 21, 2025

View reviewed changes

llvm/lib/Analysis/ValueTracking.cpp Outdated Show resolved Hide resolved

el-ev requested a review from dtcxzyw June 21, 2025 10:18

zyw-bot mentioned this pull request Jun 21, 2025

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2478

Closed

dtcxzyw reviewed Jun 21, 2025

View reviewed changes

el-ev requested a review from dtcxzyw June 21, 2025 13:23

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from c107041 to 4811b7d Compare June 21, 2025 13:28

dtcxzyw reviewed Jun 21, 2025

View reviewed changes

zyw-bot mentioned this pull request Jun 21, 2025

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2484

Closed

el-ev added 5 commits June 22, 2025 13:33

pre-commit test

8e9969d

[InstCombine] Fold `ceil(X >> C) == 0 -> X == 0` update test 1 address review comments rm header 1 1

[InstCombine] Fold ceil(X >> C) == 0 -> X == 0

1a91c57

add test

8830167

match both or and add

32cf848

add stripNullTest and update isKnownNonZero

e854101

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 4802be7 to e854101 Compare June 22, 2025 05:43

el-ev requested a review from dtcxzyw June 22, 2025 05:43

zyw-bot mentioned this pull request Jun 22, 2025

pre-commit: PR143683 dtcxzyw/llvm-opt-benchmark#2491

Closed

dtcxzyw approved these changes Jun 22, 2025

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

Update llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

da1b1d0

Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>

el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 689446a to da1b1d0 Compare June 22, 2025 09:41

nikic approved these changes Jun 22, 2025

View reviewed changes

el-ev merged commit 32f911f into main Jun 23, 2025
7 checks passed

el-ev deleted the users/el-ev/fold-ceil-shift branch June 23, 2025 02:51

miguelcsx pushed a commit to miguelcsx/llvm-project that referenced this pull request Jun 23, 2025

[InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 (llvm#143683)

9f90ab2

Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>

Jaddyen pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 23, 2025

[InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 (llvm#143683)

0ce91c9

Co-authored-by: Yingwei Zheng <dtcxzyw2333@gmail.com>

[InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 #143683

[InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 #143683

Conversation

el-ev commented Jun 11, 2025

Uh oh!

llvmbot commented Jun 11, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

dtcxzyw commented Jun 18, 2025

Uh oh!

el-ev commented Jun 21, 2025

Uh oh!

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 21, 2025

Choose a reason for hiding this comment

Uh oh!

el-ev Jun 21, 2025

Choose a reason for hiding this comment

Uh oh!

el-ev Jun 21, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 21, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 21, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

dtcxzyw commented Jun 21, 2025

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

github-actions bot commented Jun 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

[InstCombine] Fold `ceil(X / (2 ^ C)) == 0` -> `X == 0` #143683

[InstCombine] Fold `ceil(X / (2 ^ C)) == 0` -> `X == 0` #143683

llvmbot commented Jun 11, 2025 •

edited

Loading

github-actions bot commented Jun 22, 2025 •

edited

Loading