-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit #144136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Fixup NVPTXPrologEpilogPass for opt-bisect-limit #144136
Conversation
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesCurrently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or LIFETIME_END instructions are encountered. Usually this isn't a problem since a couple earlier passes will always remove them. However, when using opt-bisect-limit crashes can occur. This can hinder debugging and reveals a potential future problem if these optimization passes change their behavior. https://cuda.godbolt.org/z/E81xxKGdb This change updates NVPTXPrologEpilogPass and NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these instructions by simply removing them. While I'm here I also did some general fixup in NVPTXPrologEpilogPass to make it look more like PrologEpilogInserter (from which it was copied). Full diff: https://github.com/llvm/llvm-project/pull/144136.diff 5 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b7c5a0a5c9983..b7fd7090299a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &);
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
void initializeNVPTXPeepholePass(PassRegistry &);
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
+void initializeNVPTXPrologEpilogPassPass(PassRegistry &);
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 7929bd2e0df08..04a3c687b18f2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -41,7 +41,7 @@ class NVPTXPrologEpilogPass : public MachineFunctionPass {
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
-}
+} // end anonymous namespace
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
return new NVPTXPrologEpilogPass();
@@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
char NVPTXPrologEpilogPass::ID = 0;
+INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE, "NVPTX Prologue/Epilogue Insertion",
+ false, false)
+
+static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI,
+ unsigned OpIdx) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ if (MI.isDebugValue()) {
+
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ assert(MI.isDebugOperand(&Op) &&
+ "Frame indices can only appear as a debug operand in a DBG_VALUE*"
+ " machine instruction");
+ Register Reg;
+ unsigned FrameIdx = Op.getIndex();
+
+ StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg);
+ Op.ChangeToRegister(Reg, false /*isDef*/);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+ if (MI.isNonListDebugValue()) {
+ DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(),
+ DIExpression::ApplyOffset, Offset);
+ } else {
+ // The debug operand at DebugOpIndex was a frame index at offset
+ // `Offset`; now the operand has been replaced with the frame
+ // register, we must add Offset with `register x, plus Offset`.
+ unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op);
+ SmallVector<uint64_t, 3> Ops;
+ TRI.getOffsetOpcodes(Offset, Ops);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex);
+ }
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
+ return true;
+ }
+ return false;
+}
+
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetFrameLowering &TFI = *STI.getFrameLowering();
@@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
calculateFrameObjectOffsets(MF);
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isFI())
+ for (MachineBasicBlock &BB : MF) {
+ for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) {
+ MachineInstr &MI = *std::prev(I);
+
+ bool RemovedMI = false;
+ for (const auto &[Idx, Op] : enumerate(MI.operands())) {
+ if (!Op.isFI())
continue;
- // Frame indices in debug values are encoded in a target independent
- // way with simply the frame index and offset rather than any
- // target-specific addressing mode.
- if (MI.isDebugValue()) {
- MachineOperand &Op = MI.getOperand(i);
- assert(
- MI.isDebugOperand(&Op) &&
- "Frame indices can only appear as a debug operand in a DBG_VALUE*"
- " machine instruction");
- Register Reg;
- auto Offset =
- TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
- Op.ChangeToRegister(Reg, /*isDef=*/false);
- const DIExpression *DIExpr = MI.getDebugExpression();
- if (MI.isNonListDebugValue()) {
- DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
- } else {
- SmallVector<uint64_t, 3> Ops;
- TRI.getOffsetOpcodes(Offset, Ops);
- unsigned OpIdx = MI.getDebugOperandIndex(&Op);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx);
- }
- MI.getDebugExpressionOp().setMetadata(DIExpr);
+ if (replaceFrameIndexDebugInstr(MF, MI, Idx))
continue;
- }
- TRI.eliminateFrameIndex(MI, 0, i, nullptr);
+ // Eliminate this FrameIndex operand.
+ TRI.eliminateFrameIndex(MI, 0, Idx, nullptr);
Modified = true;
+ if (RemovedMI)
+ break;
}
+
+ if (!RemovedMI)
+ --I;
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index eb60e1502cf90..ac7025962cc99 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
+ RegScavenger *) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
- int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ if (MI.isLifetimeMarker()) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ const int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- MachineFunction &MF = *MI.getParent()->getParent();
- int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum + 1).getImm();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
+ MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 85d28a703a4cb..087a672f88289 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -114,6 +114,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
initializeNVPTXExternalAAWrapperPass(PR);
initializeNVPTXPeepholePass(PR);
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
+ initializeNVPTXPrologEpilogPassPass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
diff --git a/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
new file mode 100644
index 0000000000000..42655538cc7ad
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t
+
+;; This test is intended to verify that we don't crash when -opt-bisect-limit
+;; is used in conjunction with lifetime markers. Previously, later passes
+;; would not handle these intructions correctly and relied on earlier passes
+;; to remove them.
+
+declare void @bar(ptr)
+
+define void @foo() {
+ %p = alloca i32
+ call void @llvm.lifetime.start(i64 4, ptr %p)
+ call void @bar(ptr %p)
+ call void @llvm.lifetime.end(i64 4, ptr %p)
+ ret void
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
cba463d
to
99fe5a7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not familiar with the lifetime semantics, so, for the purposes of this review, I'm just going to trust that removing them is the right design decision.
99fe5a7
to
1840841
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM :) Thanks for the explanations
Currently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or LIFETIME_END instructions are encountered. Usually this isn't a problem since a couple earlier passes will always remove them. However, when using opt-bisect-limit crashes can occur. This can hinder debugging and reveals a potential future problem if these optimization passes change their behavior. https://cuda.godbolt.org/z/E81xxKGdb This change updates NVPTXPrologEpilogPass and NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these instructions by simply removing them. While I'm here I also did some general fixup in NVPTXPrologEpilogPass to make it look more like PrologEpilogInserter (from which it was copied).
Currently, the NVPTXPrologEpilogPass will crash if LIFETIME_START or LIFETIME_END instructions are encountered. Usually this isn't a problem since a couple earlier passes will always remove them. However, when using opt-bisect-limit crashes can occur. This can hinder debugging and reveals a potential future problem if these optimization passes change their behavior. https://cuda.godbolt.org/z/E81xxKGdb
This change updates NVPTXPrologEpilogPass and NVPTXRegisterInfo::eliminateFrameIndex to gracefully handle these instructions by simply removing them. While I'm here I also did some general fixup in NVPTXPrologEpilogPass to make it look more like PrologEpilogInserter (from which it was copied).