-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[NFC][AMDGPU] print more info when debugging InsertWaitCnts pass #144629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Sameer Sahasrabuddhe (ssahasra) ChangesFull diff: https://github.com/llvm/llvm-project/pull/144629.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
new file mode 100644
index 0000000000000..271db53c2801d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An enumeration of all the event types handled by SIInsertWaitcnts.cpp
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+AMDGPU_WAIT_EVENT(VMEM_ACCESS) // vector-memory read & write
+AMDGPU_WAIT_EVENT(VMEM_READ_ACCESS) // vector-memory read
+AMDGPU_WAIT_EVENT(VMEM_SAMPLER_READ_ACCESS) // vector-memory SAMPLER read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_BVH_READ_ACCESS) // vector-memory BVH read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_WRITE_ACCESS) // vector-memory write that is not scratch
+AMDGPU_WAIT_EVENT(SCRATCH_WRITE_ACCESS) // vector-memory write that may be scratch
+AMDGPU_WAIT_EVENT(LDS_ACCESS) // lds read & write
+AMDGPU_WAIT_EVENT(GDS_ACCESS) // gds read & write
+AMDGPU_WAIT_EVENT(SQ_MESSAGE) // send message
+AMDGPU_WAIT_EVENT(SMEM_ACCESS) // scalar-memory read & write
+AMDGPU_WAIT_EVENT(EXP_GPR_LOCK) // export holding on its data src
+AMDGPU_WAIT_EVENT(GDS_GPR_LOCK) // GDS holding on its data and addr src
+AMDGPU_WAIT_EVENT(EXP_POS_ACCESS) // write to export position
+AMDGPU_WAIT_EVENT(EXP_PARAM_ACCESS) // write to export parameter
+AMDGPU_WAIT_EVENT(VMW_GPR_LOCK) // vector-memory write holding on its data src
+AMDGPU_WAIT_EVENT(EXP_LDS_ACCESS) // read by ldsdir counting as export
+
+#undef AMDGPU_WAIT_EVENT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ca8e3244edd15..03a2dc0302780 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,24 +104,17 @@ struct HardwareLimits {
unsigned KmcntMax; // gfx12+ only.
};
+#define AMDGPU_WAIT_EVENT(Name) Name,
+
enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
- VMEM_BVH_READ_ACCESS, // vector-memory BVH read (gfx12+ only)
- VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
- SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
- EXP_LDS_ACCESS, // read by ldsdir counting as export
- NUM_WAIT_EVENTS,
+#include "AMDGPUWaitEventType.def"
+ NUM_WAIT_EVENTS
+};
+
+#define AMDGPU_WAIT_EVENT(Name) #Name,
+
+static constexpr StringLiteral WaitEventTypeName[] = {
+#include "AMDGPUWaitEventType.def"
};
// The mapping is:
@@ -1100,6 +1093,20 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
}
OS << '\n';
}
+
+ OS << "Pending Events: ";
+ if (hasPendingEvent()) {
+ ListSeparator LS;
+ for (unsigned I = 0; I != NUM_WAIT_EVENTS; ++I) {
+ if (hasPendingEvent((WaitEventType)I)) {
+ OS << LS << WaitEventTypeName[I];
+ }
+ }
+ } else {
+ OS << "none";
+ }
+ OS << '\n';
+
OS << '\n';
}
@@ -1265,10 +1272,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
MachineInstr *WaitcntInstr = nullptr;
MachineInstr *WaitcntVsCntInstr = nullptr;
+ LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n");
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "------ skipped\n");
continue;
+ }
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
@@ -1413,10 +1425,16 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
MachineInstr *CombinedStoreDsCntInstr = nullptr;
MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
+ LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It
+ << "\n");
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "------ skipped\n");
continue;
+ }
MachineInstr **UpdatableInstr;
@@ -2306,7 +2324,9 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
bool Modified = false;
LLVM_DEBUG({
- dbgs() << "*** Block" << Block.getNumber() << " ***";
+ dbgs() << "*** Block " << Block.getNumber() << ": ";
+ Block.printName(dbgs());
+ dbgs() << " ***";
ScoreBrackets.dump();
});
@@ -2437,6 +2457,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
OldWaitcntInstr);
+ LLVM_DEBUG({
+ dbgs() << "*** Block end state: " << Block.getNumber() << ": ";
+ Block.printName(dbgs());
+ ScoreBrackets.dump();
+ });
+
return Modified;
}
@@ -2699,8 +2725,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
BlockInfo &SuccBI = SuccBII->second;
if (!SuccBI.Incoming) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
if (!MoveBracketsToSucc) {
MoveBracketsToSucc = &SuccBI;
} else {
@@ -2708,8 +2736,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
}
} else if (SuccBI.Incoming->merge(*Brackets)) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
}
}
if (MoveBracketsToSucc)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
test?
@@ -1265,10 +1272,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( | |||
MachineInstr *WaitcntInstr = nullptr; | |||
MachineInstr *WaitcntVsCntInstr = nullptr; | |||
|
|||
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n"); | |
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It); |
Newline is already included in MachineInstr <<
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That newline puts helpful whitespace ahead of the "pre-existing iter" lines that come next. Sometimes *It is so long that it wraps into multiple lines.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n"); | |
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << '\n'); |
Single characters should print as character and not string
for (auto &II : | ||
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { | ||
if (II.isMetaInstruction()) | ||
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n"); | |
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same logic. The newline adds whitespace ahead of whatever follows.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can also move this to the next print
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand. The next print is inside a conditional ...
dbgs() << "*** Block end state: " << Block.getNumber() << ": "; | ||
Block.printName(dbgs()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use printMBBReference
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like printMBBReference does not include the name of the original LLVM IR block. Using printName() is more useful that way.
for (auto &II : | ||
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { | ||
if (II.isMetaInstruction()) | ||
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n"); | |
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II); |
It's debug printing only? |
so we generally don't test debug print? |
Nope |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small cleanup in how BB names are printed. But overall, I would prefer to keep the extra newlines and the use of BB.printName() mentioned in the comments.
@@ -1265,10 +1272,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( | |||
MachineInstr *WaitcntInstr = nullptr; | |||
MachineInstr *WaitcntVsCntInstr = nullptr; | |||
|
|||
LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That newline puts helpful whitespace ahead of the "pre-existing iter" lines that come next. Sometimes *It is so long that it wraps into multiple lines.
for (auto &II : | ||
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) { | ||
if (II.isMetaInstruction()) | ||
LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same logic. The newline adds whitespace ahead of whatever follows.
dbgs() << "*** Block end state: " << Block.getNumber() << ": "; | ||
Block.printName(dbgs()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like printMBBReference does not include the name of the original LLVM IR block. Using printName() is more useful that way.
|
||
// NOTE: NO INCLUDE GUARD DESIRED! | ||
|
||
AMDGPU_WAIT_EVENT(VMEM_ACCESS) // vector-memory read & write |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's no need to move this list into a separate file if you define it like:
#define AMDGPU_WAIT_EVENTS(AMDGPU_WAIT_EVENT) \
AMDGPU_WAIT_EVENT(VMEM_ACCESS) \
AMDGPU_WAIT_EVENT(VMEM_READ_ACCESS) \
...
No description provided.