-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Process simplifyRecipes via a worklist #133977
base: main
Are you sure you want to change the base?
Changes from all commits
49e9003
7731e0a
7093020
e7a77ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -923,7 +923,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) { | |
} | ||
|
||
/// Try to simplify recipe \p R. | ||
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | ||
static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | ||
using namespace llvm::VPlanPatternMatch; | ||
|
||
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for | ||
|
@@ -932,8 +932,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) { | ||
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() && | ||
vputils::onlyFirstLaneUsed(Steps)) { | ||
Steps->replaceAllUsesWith(Steps->getOperand(0)); | ||
return; | ||
return Steps->getOperand(0); | ||
} | ||
} | ||
|
||
|
@@ -943,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
Type *TruncTy = TypeInfo.inferScalarType(Trunc); | ||
Type *ATy = TypeInfo.inferScalarType(A); | ||
if (TruncTy == ATy) { | ||
Trunc->replaceAllUsesWith(A); | ||
return A; | ||
} else { | ||
// Don't replace a scalarizing recipe with a widened cast. | ||
if (isa<VPReplicateRecipe>(&R)) | ||
return; | ||
return nullptr; | ||
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { | ||
|
||
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue())) | ||
|
@@ -960,25 +959,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
VPC->setUnderlyingValue(UnderlyingExt); | ||
} | ||
VPC->insertBefore(&R); | ||
Trunc->replaceAllUsesWith(VPC); | ||
return VPC; | ||
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { | ||
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); | ||
VPC->insertBefore(&R); | ||
Trunc->replaceAllUsesWith(VPC); | ||
return VPC; | ||
} | ||
} | ||
#ifndef NDEBUG | ||
// Verify that the cached type info is for both A and its users is still | ||
// accurate by comparing it to freshly computed types. | ||
VPTypeAnalysis TypeInfo2( | ||
R.getParent()->getPlan()->getCanonicalIV()->getScalarType()); | ||
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A)); | ||
for (VPUser *U : A->users()) { | ||
auto *R = cast<VPRecipeBase>(U); | ||
for (VPValue *VPV : R->definedValues()) | ||
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV)); | ||
} | ||
#endif | ||
} | ||
|
||
// Simplify (X && Y) || (X && !Y) -> X. | ||
|
@@ -988,20 +975,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
VPValue *X, *Y; | ||
if (match(&R, | ||
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), | ||
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) { | ||
R.getVPSingleValue()->replaceAllUsesWith(X); | ||
R.eraseFromParent(); | ||
return; | ||
} | ||
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) | ||
return X; | ||
|
||
if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) | ||
return R.getVPSingleValue()->replaceAllUsesWith(X); | ||
return X; | ||
|
||
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) | ||
return R.getVPSingleValue()->replaceAllUsesWith(A); | ||
return A; | ||
|
||
if (match(&R, m_Not(m_Not(m_VPValue(A))))) | ||
return R.getVPSingleValue()->replaceAllUsesWith(A); | ||
return A; | ||
|
||
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0. | ||
if ((match(&R, | ||
|
@@ -1010,16 +994,45 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) && | ||
TypeInfo.inferScalarType(R.getOperand(1)) == | ||
TypeInfo.inferScalarType(R.getVPSingleValue())) | ||
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1)); | ||
return R.getOperand(1); | ||
|
||
return nullptr; | ||
} | ||
|
||
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) { | ||
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( | ||
Plan.getEntry()); | ||
VPTypeAnalysis TypeInfo(&CanonicalIVTy); | ||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { | ||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
simplifyRecipe(R, TypeInfo); | ||
SetVector<VPRecipeBase *> Worklist; | ||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) | ||
for (VPRecipeBase &R : reverse(*VPBB)) | ||
Worklist.insert(&R); | ||
|
||
while (!Worklist.empty()) { | ||
VPRecipeBase *R = Worklist.pop_back_val(); | ||
if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) { | ||
R->getVPSingleValue()->replaceAllUsesWith(Result); | ||
TypeInfo.erase(R->getVPSingleValue()); | ||
R->eraseFromParent(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think erasing R might invalidate the VTypeAnalysis cache, I'll take a look to see if this is a problem There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if we need eraseFromParent, as there is recursivelyRemoveDeadRecipes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've manually erased the deleted value from the cache in 4e05ab9. This was an existing issue with the simplification pass but it looks like it didn't trigger in the wild. I think it's probably easiest to fix it in this PR now that we erase all replaced recipes, which should prevent #120252. I've also moved the VTypeAnalysis assertion up so it should run for all transforms, not just the trunc combine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @artagnon erasing the old instruction decreases the number of uses which can affect what transforms kick in, e.g. the blend simplifications are sensitive to the number of uses. Also if the old recipe uses the new result, it will get added back to the worklist which will loop There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I should mention I took the structure from this from InstructionCombining.cpp! There's a few things there that I think we could eventually copy over, e.g. adding any operands of the recipe to the worklist now that they have one less use. |
||
if (VPRecipeBase *ResultR = Result->getDefiningRecipe()) | ||
Worklist.insert(ResultR); | ||
for (VPUser *U : Result->users()) | ||
if (auto *UR = dyn_cast<VPRecipeBase>(U)) | ||
Worklist.insert(UR); | ||
|
||
#ifndef NDEBUG | ||
// Verify that the cached type info is for both Result and its users is | ||
// still accurate by comparing it to freshly computed types. | ||
VPTypeAnalysis TypeInfo2(&CanonicalIVTy); | ||
assert(TypeInfo.inferScalarType(Result) == | ||
TypeInfo2.inferScalarType(Result)); | ||
for (VPUser *U : Result->users()) { | ||
auto *R = cast<VPRecipeBase>(U); | ||
for (VPValue *VPV : R->definedValues()) | ||
assert(TypeInfo.inferScalarType(VPV) == | ||
TypeInfo2.inferScalarType(VPV)); | ||
} | ||
#endif | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this needs to be a std::deque, because we want to pop_front, and push_back. See also: #93998.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh thanks for sharing that, I didn't know about that previous PR. Why do we need it to be pop_front/push_back? I believe InstCombine uses pop_back/push_back, i.e. immediately processes anything added to the list.
I also just realised that InstCombine pushes the initial instructions onto the worklist in order, so the perceived order is actually top to bottom. I've updated that here 3ca3643
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't matter yet, but matters when you will put up a patch with functional changes: I believe the order of simplification matters unlike in InstCombine, but I'm happy to address the point in the follow-up patch. Let's keep this thread open in case other reviewers have something to say about this.