Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VPlan] Process simplifyRecipes via a worklist #133977

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
Original file line number Diff line number Diff line change
@@ -63,6 +63,9 @@ class VPTypeAnalysis {

/// Return the LLVMContext used by the analysis.
LLVMContext &getContext() { return Ctx; }

/// Remove \p V from the cache. You must call this after a value is erased.
void erase(VPValue *V) { CachedTypes.erase(V); }
};

// Collect a VPlan's ephemeral recipes (those used only by an assume).
75 changes: 44 additions & 31 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
@@ -923,7 +923,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}

/// Try to simplify recipe \p R.
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
using namespace llvm::VPlanPatternMatch;

// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
@@ -932,8 +932,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
return Steps->getOperand(0);
}
}

@@ -943,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
Type *ATy = TypeInfo.inferScalarType(A);
if (TruncTy == ATy) {
Trunc->replaceAllUsesWith(A);
return A;
} else {
// Don't replace a scalarizing recipe with a widened cast.
if (isa<VPReplicateRecipe>(&R))
return;
return nullptr;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {

unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
@@ -960,25 +959,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPC->setUnderlyingValue(UnderlyingExt);
}
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
return VPC;
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
return VPC;
}
}
#ifndef NDEBUG
// Verify that the cached type info is for both A and its users is still
// accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(
R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
for (VPUser *U : A->users()) {
auto *R = cast<VPRecipeBase>(U);
for (VPValue *VPV : R->definedValues())
assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
}
#endif
}

// Simplify (X && Y) || (X && !Y) -> X.
@@ -988,20 +975,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPValue *X, *Y;
if (match(&R,
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
R.getVPSingleValue()->replaceAllUsesWith(X);
R.eraseFromParent();
return;
}
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
return X;

if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return R.getVPSingleValue()->replaceAllUsesWith(X);
return X;

if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
return A;

if (match(&R, m_Not(m_Not(m_VPValue(A)))))
return R.getVPSingleValue()->replaceAllUsesWith(A);
return A;

// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(&R,
@@ -1010,16 +994,45 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
TypeInfo.inferScalarType(R.getOperand(1)) ==
TypeInfo.inferScalarType(R.getVPSingleValue()))
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
return R.getOperand(1);

return nullptr;
}

void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
simplifyRecipe(R, TypeInfo);
SetVector<VPRecipeBase *> Worklist;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs to be a std::deque, because we want to pop_front, and push_back. See also: #93998.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh thanks for sharing that, I didn't know about that previous PR. Why do we need it to be pop_front/push_back? I believe InstCombine uses pop_back/push_back, i.e. immediately processes anything added to the list.

I also just realised that InstCombine pushes the initial instructions onto the worklist in order, so the perceived order is actually top to bottom. I've updated that here 3ca3643

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't matter yet, but matters when you will put up a patch with functional changes: I believe the order of simplification matters unlike in InstCombine, but I'm happy to address the point in the follow-up patch. Let's keep this thread open in case other reviewers have something to say about this.

for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
for (VPRecipeBase &R : reverse(*VPBB))
Worklist.insert(&R);

while (!Worklist.empty()) {
VPRecipeBase *R = Worklist.pop_back_val();
if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
R->getVPSingleValue()->replaceAllUsesWith(Result);
TypeInfo.erase(R->getVPSingleValue());
R->eraseFromParent();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think erasing R might invalidate the VTypeAnalysis cache, I'll take a look to see if this is a problem

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we need eraseFromParent, as there is recursivelyRemoveDeadRecipes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've manually erased the deleted value from the cache in 4e05ab9.

This was an existing issue with the simplification pass but it looks like it didn't trigger in the wild. I think it's probably easiest to fix it in this PR now that we erase all replaced recipes, which should prevent #120252.

I've also moved the VTypeAnalysis assertion up so it should run for all transforms, not just the trunc combine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@artagnon erasing the old instruction decreases the number of uses which can affect what transforms kick in, e.g. the blend simplifications are sensitive to the number of uses.

Also if the old recipe uses the new result, it will get added back to the worklist which will loop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should mention I took the structure from this from InstructionCombining.cpp! There's a few things there that I think we could eventually copy over, e.g. adding any operands of the recipe to the worklist now that they have one less use.

if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
Worklist.insert(ResultR);
for (VPUser *U : Result->users())
if (auto *UR = dyn_cast<VPRecipeBase>(U))
Worklist.insert(UR);

#ifndef NDEBUG
// Verify that the cached type info is for both Result and its users is
// still accurate by comparing it to freshly computed types.
VPTypeAnalysis TypeInfo2(&CanonicalIVTy);
assert(TypeInfo.inferScalarType(Result) ==
TypeInfo2.inferScalarType(Result));
for (VPUser *U : Result->users()) {
auto *R = cast<VPRecipeBase>(U);
for (VPValue *VPV : R->definedValues())
assert(TypeInfo.inferScalarType(VPV) ==
TypeInfo2.inferScalarType(VPV));
}
#endif
}
}
}