-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Simplify select c, x, x -> x #133731
[VPlan] Simplify select c, x, x -> x #133731
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesAs noted in 1a9358c, some simplifications can produce a redundant select where the true and false operands are the same. This simplifies these selects and adds m_Deferred to VPlanPatternMatch (just copied over from IR/PatternMatch) to simplify the pattern matching. Some upcoming patches I plan on posting can also be made cleaner with m_Deferred. The is_fpclass was changed so the condition wasn't made dead. I didn't notice any legacy-vplan cost model mismatches on llvm-test-suite or SPEC with this patch. Full diff: https://github.com/llvm/llvm-project/pull/133731.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 3a727866a2875..24d1a16d617cf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -66,6 +66,27 @@ struct specificval_ty {
inline specificval_ty m_Specific(const VPValue *VPV) { return VPV; }
+/// Stores a reference to the VPValue *, not the VPValue * itself,
+/// thus can be used in commutative matchers.
+template <typename Class> struct deferredval_ty {
+ Class *const &Val;
+
+ deferredval_ty(Class *const &V) : Val(V) {}
+
+ template <typename ITy> bool match(ITy *const V) { return V == Val; }
+};
+
+/// Like m_Specific(), but works if the specific value to match is determined
+/// as part of the same match() expression. For example:
+/// m_Mul(m_VPValue(X), m_Specific(X)) is incorrect, because m_Specific() will
+/// bind X before the pattern match starts.
+/// m_Mul(m_VPValue(X), m_Deferred(X)) is correct, and will check against
+/// whichever value m_VPValue(X) populated.
+inline deferredval_ty<VPValue> m_Deferred(VPValue *const &V) { return V; }
+inline deferredval_ty<const VPValue> m_Deferred(const VPValue *const &V) {
+ return V;
+}
+
/// Match a specified integer value or vector of all elements of that
/// value. \p BitWidth optionally specifies the bitwidth the matched constant
/// must have. If it is 0, the matched constant can have any bitwidth.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a2e6182cf16e2..879cc247aec81 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1059,6 +1059,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
+ R.eraseFromParent();
+ return;
+ }
+
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 1f171414242bc..c39dd6ffe5f01 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1046,16 +1046,11 @@ define i64 @live_in_known_1_via_scev() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5)
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll
index c429092a50e6a..9f82c93d3579d 100644
--- a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll
+++ b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll
@@ -9,12 +9,12 @@ define void @d() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr null, align 4
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr @d, i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> [[BROADCAST_SPLAT]], i32 0)
-; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x float> zeroinitializer, <2 x float> splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -30,7 +30,7 @@ define void @d() {
; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]]
; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[I3]], i32 0)
-; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 0.000000e+00
+; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 1.000000e+00
; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4
; CHECK-NEXT: [[I7]] = add i64 [[I]], 1
; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 128
@@ -46,7 +46,7 @@ loop:
%i3 = load float, ptr null, align 4
%i4 = getelementptr float, ptr @d, i64 %i
%i5 = tail call i1 @llvm.is.fpclass.f32(float %i3, i32 0)
- %i6 = select i1 %i5, float 0.0, float 0.0
+ %i6 = select i1 %i5, float 0.0, float 1.0
store float %i6, ptr %i4, align 4
%i7 = add i64 %i, 1
%i8 = icmp eq i64 %i7, 128
diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
index 836115f381382..16cc05da17a63 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
@@ -88,16 +88,11 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12)
-; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_PHI]])
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ 12, [[ENTRY:%.*]] ]
|
Actually I've just noticed the combine above can use m_Deferred. I've split off the m_Deferred bit into #133736 to keep it NFC |
As noted in 1a9358c, some simplifications can produce a redundant select where the true and false operands are the same, which this patch removes. The is_fpclass was changed so the condition wasn't made dead. I didn't notice any legacy-vplan cost model mismatches on llvm-test-suite or SPEC with this patch.
14df426
to
7f541f4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks. Looks like the redundant selects are the result of other simplifications I
As noted in 1a9358c, some simplifications can produce a redundant select where the true and false operands are the same, which this patch removes.
The is_fpclass test was changed so the condition wasn't made dead.
I didn't notice any legacy-vplan cost model mismatches on llvm-test-suite or SPEC with this patch.