[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

rampitec · 2025-06-24T20:39:41Z

It also brings in some DPP changes needed to define it.

rampitec · 2025-06-24T20:40:03Z

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-06-24T20:41:21Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

It also brings in some DPP changes needed to define it.

Full diff: https://github.com/llvm/llvm-project/pull/145591.diff

7 Files Affected:

(modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+5)
(modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+8-1)
(modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+21-5)
(added) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+17)
(added) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+17)
(added) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+11)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+21)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 07a4292ef28bc..27b3d6bc9440c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -661,6 +661,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
         break;
 
+      // FIXME: DecoderTableGFX125064 is not defined yet.
+      if (isGFX1250() &&
+          tryDecodeInst(DecoderTableGFX1250_FAKE1664, MI, QW, Address, CS))
+        break;
+
       if (isGFX12() &&
           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
                         Address, CS))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 89a9ecc27c6ed..9ed054449c264 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -32,6 +32,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
   let HasExtDPP = 0;
 }
 
+let HasExt64BitDPP = 1 in {
 def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
 def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
 
@@ -48,10 +49,13 @@ class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
   let HasExtDPP = 0;
 }
 
+def V_LSHL_ADD_U64_PROF : VOP3_Profile<VOP_I64_I64_I32_I64>;
+
 def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
   let HasExtVOP3DPP = 0;
   let HasExtDPP = 0;
 }
+} // End HasExt64BitDPP = 1;
 
 //===----------------------------------------------------------------------===//
 // VOP3 INTERP
@@ -722,7 +726,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
 // V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
 // src0 is shifted left by 0-4 (use “0” to get ADD_U64).
 let SubtargetPredicate = HasLshlAddU64Inst in
-defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", V_LSHL_ADD_U64_PROF>;
 
 let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
     SchedRW = [WriteFloatCvt] in {
@@ -1889,6 +1893,9 @@ let AssemblerPredicate = isGFX11Plus in {
   def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
 }
 
+// These instructions differ from GFX12 variant by supporting DPP:
+defm V_LSHL_ADD_U64                  : VOP3Only_Realtriple_gfx1250<0x252>;
+
 //===----------------------------------------------------------------------===//
 // GFX10.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 19d490465f163..0b64b504466c8 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1552,12 +1552,17 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
             ""));
 }
 
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
+                    list<Predicate> predicates = []> {
   def _e64 : VOP3InstBase<OpName, P, node>;
-  let SubtargetPredicate = isGFX11Plus in {
-    if P.HasExtVOP3DPP then
-      def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
-  } // end SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let SubtargetPredicate = isGFX11Plus;
+    }
+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
+    }
 }
 
 class UniformUnaryFragOrOp<SDPatternOperator Op> {
@@ -1961,6 +1966,17 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
 multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
   VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
 
+multiclass VOP3Only_Realtriple_with_name_gfx12_not_gfx1250<bits<10> op, string opName,
+                                                           string asmName, string pseudo_mnemonic = "",
+                                                           bit isSingle = 0> :
+  VOP3_Realtriple_with_name<GFX12Not12_50Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
+
+multiclass VOP3Only_Real_Base_gfx1250<bits<10> op> :
+  VOP3_Real_Base<GFX1250Gen, op, NAME, 1/*IsSingle*/>;
+
+multiclass VOP3Only_Realtriple_gfx1250<bits<10> op, bit isSingle = 0> :
+  VOP3_Realtriple<GFX1250Gen, op, isSingle>;
+
 multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
                                      string pseudo_mnemonic = "", bit isSingle = 0> :
   VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
new file mode 100644
index 0000000000000..0070c8ab9ee78
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
new file mode 100644
index 0000000000000..553eacc8e7b61
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s
+
+v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], 0, 1
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3]
+// GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, 12345
+// GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
new file mode 100644
index 0000000000000..e2fafe415ff7f
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX125X-ERR,GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] dpp8:[7,6,5,4,3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}}                                          ^
+
+v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+// GFX125X-ERR-NEXT:{{^}}v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] quad_perm:[3,2,1,0]
+// GFX125X-ERR-NEXT:{{^}}                                          ^
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
new file mode 100644
index 0000000000000..d9d8f60fe3d17
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
+
+0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 0, 1     ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x01,0x05,0x02]
+
+0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], 3, s[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x07,0x09,0x00]
+
+0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04
+# GFX1250: v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x08,0x09,0x04]
+
+0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00
+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GFX1250-FAKE16: {{.*}}
+# GFX1250-REAL16: {{.*}}

changpeng · 2025-06-24T22:40:20Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

+  else if P.HasExt64BitDPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let OtherPredicates = !listconcat(predicates, [HasDPALU_DPP]);
+    }


I am not comfortable that VOP3Inst here uses both SubtargetPredicate and OtherPredicates.
What if we define a VOP3Inst under another Predicate later?

The problem is you would get it on all forms, while it is only needed on _64_dpp. I remember I was really struggling, before came to this.

changpeng · 2025-06-24T22:45:46Z

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt

+# GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GFX1250-FAKE16: {{.*}}
+# GFX1250-REAL16: {{.*}}


I guess these prefixes will be used as more instructions added.

Yes, there are 16-bit instructions there. Just not added yet.

changpeng

LGTM with concern for the future with both SubtargetPredicate and OtherPredicates used in VOP3Inst.

changpeng · 2025-06-24T22:54:17Z

llvm/lib/Target/AMDGPU/VOPInstructions.td

-  } // end SubtargetPredicate = isGFX11Plus
+  if P.HasExtVOP3DPP then
+    def _e64_dpp  : VOP3_DPP_Pseudo <OpName, P> {
+      let SubtargetPredicate = isGFX11Plus;


Can (or should) we use "let OtherPredicates = [isGFX11Plus]" instead here?

isGFX11Plus is really a subtarget.

It also brings in some DPP changes needed to define it.

[AMDGPU] Support v_lshl_add_u64 in gfx1250

e280059

It also brings in some DPP changes needed to define it.

rampitec requested review from shiltian and cdevadas June 24, 2025 20:40

rampitec marked this pull request as ready for review June 24, 2025 20:40

llvmbot added backend:AMDGPU mc Machine (object) code labels Jun 24, 2025

rampitec requested a review from changpeng June 24, 2025 22:05

changpeng reviewed Jun 24, 2025

View reviewed changes

changpeng approved these changes Jun 24, 2025

View reviewed changes

rampitec merged commit d06c2ef into main Jun 24, 2025
12 checks passed

rampitec deleted the users/rampitec/06-24-_amdgpu_support_v_lshl_add_u64_in_gfx1250 branch June 24, 2025 22:49

changpeng reviewed Jun 24, 2025

View reviewed changes

anthonyhatran pushed a commit to anthonyhatran/llvm-project that referenced this pull request Jun 26, 2025

[AMDGPU] Support v_lshl_add_u64 in gfx1250 (llvm#145591)

815b0ac

It also brings in some DPP changes needed to define it.

rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025

[AMDGPU] Support v_lshl_add_u64 in gfx1250 (llvm#145591)

be4f0df

It also brings in some DPP changes needed to define it.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

Uh oh!

rampitec commented Jun 24, 2025

Uh oh!

rampitec commented Jun 24, 2025

Uh oh!

llvmbot commented Jun 24, 2025 •

edited

Loading

Uh oh!

changpeng Jun 24, 2025

Uh oh!

rampitec Jun 24, 2025

Uh oh!

changpeng Jun 24, 2025

Uh oh!

rampitec Jun 24, 2025

Uh oh!

changpeng left a comment

Uh oh!

Uh oh!

changpeng Jun 24, 2025

Uh oh!

rampitec Jun 24, 2025

Uh oh!

Uh oh!

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

[AMDGPU] Support v_lshl_add_u64 in gfx1250 #145591

Uh oh!

Conversation

rampitec commented Jun 24, 2025

Uh oh!

rampitec commented Jun 24, 2025

Uh oh!

llvmbot commented Jun 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

changpeng Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

rampitec Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

changpeng Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

rampitec Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

changpeng left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

changpeng Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

rampitec Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvmbot commented Jun 24, 2025 •

edited

Loading