-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591
Conversation
It also brings in some DPP changes needed to define it.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Stanislav Mekhanoshin (rampitec) ChangesIt also brings in some DPP changes needed to define it. Full diff: https://github.com/llvm/llvm-project/pull/145591.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 07a4292ef28bc..27b3d6bc9440c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
break;
+ // FIXME: DecoderTableGFX125064 is not defined yet.
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
+ break;
+
if (isGFX12() &&
tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
Address, CS))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 89a9ecc27c6ed..9ed054449c264 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let HasExtDPP = 0;
}
+let HasExt64BitDPP = 1 in {
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
@@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
let HasExtDPP = 0;
}
+def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
+
def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
let HasExtVOP3DPP = 0;
let HasExtDPP = 0;
}
+} // End HasExt64BitDPP = 1;
//===----------------------------------------------------------------------===//
// VOP3 INTERP
@@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
let SubtargetPredicate = HasLshlAddU64Inst in
-defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
@@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
}
+// These instructions differ from GFX12 variant by supporting DPP:
+defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>;
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 19d490465f163..0b64b504466c8 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
""));
}
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
+ list<Predicate> predicates = []> {
def _e64 : VOP3InstBase<OpName, P, node>;
- let SubtargetPredicate = isGFX11Plus in {
- if P.HasExtVOP3DPP then
- def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
- } // end SubtargetPredicate = isGFX11Plus
+ if P.HasExtVOP3DPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
+ let SubtargetPredicate = isGFX11Plus;
+ }
+ else if P.HasExt64BitDPP then
+ def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> {
+ let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
+ }
}
class UniformUnaryFragOrOp<SDPatternOperator Op> {
@@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
+multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
+ string asmName, string pseudo_mnemonic = "",
+ bit isSingle = 0> :
+ VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
+ VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;
+
+multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
+ VOP3_Realtriple<GFX1250Gen, op, isSingle>;
+
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
new file mode 100644
index 0000000000000..0070c8ab9ee78
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
new file mode 100644
index 0000000000000..553eacc8e7b61
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
new file mode 100644
index 0000000000000..e2fafe415ff7f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}} ^
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}} ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
new file mode 100644
index 0000000000000..d9d8f60fe3d17
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+
+0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GFX1250-FAKE16: {{.*}}
+# GFX1250-REAL16: {{.*}}
|
else if P.HasExt64BitDPP then | ||
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> { | ||
let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not comfortable that VOP3Inst here uses both SubtargetPredicate and OtherPredicates.
What if we define a VOP3Inst under another Predicate later?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is you would get it on all forms, while it is only needed on _64_dpp. I remember I was really struggling, before came to this.
# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] | ||
## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: | ||
# GFX1250-FAKE16: {{.*}} | ||
# GFX1250-REAL16: {{.*}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess these prefixes will be used as more instructions added.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, there are 16-bit instructions there. Just not added yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with concern for the future with both SubtargetPredicate and OtherPredicates used in VOP3Inst.
} // end SubtargetPredicate = isGFX11Plus | ||
if P.HasExtVOP3DPP then | ||
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P> { | ||
let SubtargetPredicate = isGFX11Plus; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can (or should) we use "let OtherPredicates = [isGFX11Plus]" instead here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isGFX11Plus is really a subtarget.
It also brings in some DPP changes needed to define it.
It also brings in some DPP changes needed to define it.
It also brings in some DPP changes needed to define it.