-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Support scalable vector vp.reverse/splice with Zvfhmin/Zvfbfmin. #145588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesFull diff: https://github.com/llvm/llvm-project/pull/145588.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2b3f8d1cdf60f..712f6154732a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1156,6 +1156,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
ISD::VECTOR_COMPRESS},
VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
MVT EltVT = VT.getVectorElementType();
if (isTypeLegal(EltVT))
setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
index 4bbd10df5254f..9fafc4ac0667e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x half> @test_vp_reverse_nxv1f16_masked(<vscale x 1 x half> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
; CHECK-LABEL: test_vp_reverse_nxv1f16_masked:
@@ -435,3 +436,177 @@ define <vscale x 32 x half> @test_vp_reverse_nxv32f16(<vscale x 32 x half> %src,
%dst = call <vscale x 32 x half> @llvm.experimental.vp.reverse.nxv32f16(<vscale x 32 x half> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
ret <vscale x 32 x half> %dst
}
+
+define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16_masked(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x bfloat> %dst
+}
+
+define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16(<vscale x 1 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 1 x bfloat> %dst
+}
+
+define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16_masked(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x bfloat> %dst
+}
+
+define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16(<vscale x 2 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 2 x bfloat> %dst
+}
+
+define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16_masked(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x bfloat> %dst
+}
+
+define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16(<vscale x 4 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 4 x bfloat> %dst
+}
+
+define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16_masked(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x bfloat> %dst
+}
+
+define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16(<vscale x 8 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 8 x bfloat> %dst
+}
+
+define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16_masked(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x bfloat> %dst
+}
+
+define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16(<vscale x 16 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 16 x bfloat> %dst
+}
+
+define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16_masked(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 %evl)
+ ret <vscale x 32 x bfloat> %dst
+}
+
+define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16(<vscale x 32 x bfloat> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+
+ %dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
+ ret <vscale x 32 x bfloat> %dst
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll
index 792afb48fadda..6008ea43e9158 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh -verify-machineinstrs \
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs \
+; RUN: < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \
; RUN: < %s | FileCheck %s
define <vscale x 2 x i64> @test_vp_splice_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
@@ -464,3 +466,42 @@ define <vscale x 2 x half> @test_vp_splice_nxv2f16_masked(<vscale x 2 x half> %v
%v = call <vscale x 2 x half> @llvm.experimental.vp.splice.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 5, <vscale x 2 x i1> %mask, i32 %evla, i32 %evlb)
ret <vscale x 2 x half> %v
}
+
+define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
+; CHECK-LABEL: test_vp_splice_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, -5
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 5
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 5, <vscale x 2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_negative_offset(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) {
+; CHECK-LABEL: test_vp_splice_nxv2bf16_negative_offset:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, -5
+; CHECK-NEXT: vsetivli zero, 5, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 5
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 -5, <vscale x 2 x i1> splat (i1 1), i32 %evla, i32 %evlb)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @test_vp_splice_nxv2bf16_masked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) {
+; CHECK-LABEL: test_vp_splice_nxv2bf16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, -5
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.experimental.vp.splice.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 5, <vscale x 2 x i1> %mask, i32 %evla, i32 %evlb)
+ ret <vscale x 2 x bfloat> %v
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -1156,6 +1156,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | |||
ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, | |||
ISD::VECTOR_COMPRESS}, | |||
VT, Custom); | |||
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really specific to this change - but the naming of this routine doesn't match what it's used for. We're not always promoting to f32.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed
No description provided.