-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Support LLVM IR intrinsics for XAndesVBFHCvt #145321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThis patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Full diff: https://github.com/llvm/llvm-project/pull/145321.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
index 270066f815d8b..92a07e9a6a5d0 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -11,6 +11,10 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "riscv" in {
+ // Andes Vector BFloat16 Conversion Extension
+ def int_riscv_nds_vfwcvt_s_bf16 : RISCVConversionUnMasked;
+ def int_riscv_nds_vfncvt_bf16_s : RISCVConversionUnMaskedRoundingMode;
+
// Andes Vector Packed FP16 Extension
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e568052079ce..8f428ff6a54f3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -215,7 +215,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasVInstructionsBF16Minimal())
+ if (Subtarget.hasVInstructionsBF16Minimal() ||
+ Subtarget.hasVendorXAndesVBFHCvt())
for (MVT VT : BF16VecVTs)
addRegClassForRVV(VT);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 4b8d40d1429aa..0e2fcd2336151 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -384,6 +384,46 @@ class NDSRVInstVBFHCvt<bits<7> funct7, bits<5> vs1, string opcodestr>
// Multiclass
//===----------------------------------------------------------------------===//
+multiclass VPseudoVWCVT_S_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMask<m.wvrclass, m.vrclass, constraint>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVNCVT_BF16_S {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.wvrclass, constraint>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPatConversionS_BF16<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMask<intrinsic, instruction, "BF16", fwti.Vector, fvti.Vector,
+ fvti.Log2SEW, fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionBF16_S<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMaskRoundingMode<intrinsic, instruction, "S", fvti.Vector, fwti.Vector,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
let fprclass = !cast<RegisterClass>("FPR32") in
def SCALAR_F16_FPR32 : FPR_Info<16>;
@@ -547,6 +587,14 @@ def : Sh2AddPat<NDS_LEA_W_ZE>;
def : Sh3AddPat<NDS_LEA_D_ZE>;
} // Predicates = [HasVendorXAndesPerf, IsRV64]
+let Predicates = [HasVendorXAndesVBFHCvt] in {
+defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
+defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;
+} // Predicates = [HasVendorXAndesVBFHCvt]
+
+defm : VPatConversionS_BF16<"int_riscv_nds_vfwcvt_s_bf16", "PseudoNDS_VFWCVT_S">;
+defm : VPatConversionBF16_S<"int_riscv_nds_vfncvt_bf16_s", "PseudoNDS_VFNCVT_BF16">;
+
let Predicates = [HasVendorXAndesVPackFPH],
mayRaiseFPException = true in {
defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
new file mode 100644
index 0000000000000..3cde575db8f05
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
new file mode 100644
index 0000000000000..d44295d2480c1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x float> @intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x float> %a
+}
|
@llvm/pr-subscribers-llvm-ir Author: Jim Lin (tclin914) ChangesThis patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Full diff: https://github.com/llvm/llvm-project/pull/145321.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
index 270066f815d8b..92a07e9a6a5d0 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -11,6 +11,10 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "riscv" in {
+ // Andes Vector BFloat16 Conversion Extension
+ def int_riscv_nds_vfwcvt_s_bf16 : RISCVConversionUnMasked;
+ def int_riscv_nds_vfncvt_bf16_s : RISCVConversionUnMaskedRoundingMode;
+
// Andes Vector Packed FP16 Extension
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e568052079ce..8f428ff6a54f3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -215,7 +215,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasVInstructionsBF16Minimal())
+ if (Subtarget.hasVInstructionsBF16Minimal() ||
+ Subtarget.hasVendorXAndesVBFHCvt())
for (MVT VT : BF16VecVTs)
addRegClassForRVV(VT);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 4b8d40d1429aa..0e2fcd2336151 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -384,6 +384,46 @@ class NDSRVInstVBFHCvt<bits<7> funct7, bits<5> vs1, string opcodestr>
// Multiclass
//===----------------------------------------------------------------------===//
+multiclass VPseudoVWCVT_S_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMask<m.wvrclass, m.vrclass, constraint>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVNCVT_BF16_S {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.wvrclass, constraint>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPatConversionS_BF16<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMask<intrinsic, instruction, "BF16", fwti.Vector, fvti.Vector,
+ fvti.Log2SEW, fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionBF16_S<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMaskRoundingMode<intrinsic, instruction, "S", fvti.Vector, fwti.Vector,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
let fprclass = !cast<RegisterClass>("FPR32") in
def SCALAR_F16_FPR32 : FPR_Info<16>;
@@ -547,6 +587,14 @@ def : Sh2AddPat<NDS_LEA_W_ZE>;
def : Sh3AddPat<NDS_LEA_D_ZE>;
} // Predicates = [HasVendorXAndesPerf, IsRV64]
+let Predicates = [HasVendorXAndesVBFHCvt] in {
+defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
+defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;
+} // Predicates = [HasVendorXAndesVBFHCvt]
+
+defm : VPatConversionS_BF16<"int_riscv_nds_vfwcvt_s_bf16", "PseudoNDS_VFWCVT_S">;
+defm : VPatConversionBF16_S<"int_riscv_nds_vfncvt_bf16_s", "PseudoNDS_VFNCVT_BF16">;
+
let Predicates = [HasVendorXAndesVPackFPH],
mayRaiseFPException = true in {
defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
new file mode 100644
index 0000000000000..3cde575db8f05
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
new file mode 100644
index 0000000000000..d44295d2480c1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x float> @intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x float> %a
+}
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll llvm/lib/Target/RISCV/RISCVISelLowering.cpp The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches. The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt.
The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt
Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches.
The clang part will be added in a later patch.