-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ARM] Add neon vector support for floor #142559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This marks ffloor as legal providing that armv8 and neon is present (or fullfp16 for the fp16 instructions). The existing arm_neon_vrintm intrinsics are auto-upgraded to llvm.floor.
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-clang Author: David Green (davemgreen) ChangesThis marks ffloor as legal providing that armv8 and neon is present (or fullfp16 for the fp16 instructions). The existing arm_neon_vrintm intrinsics are auto-upgraded to llvm.floor. If this is OK I will update the other vrint intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/142559.diff 8 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 1cf8f6819b75a..e0bc2fb144e04 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -843,8 +843,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
- NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
- NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
+ NEONMAP1(vrndm_v, floor, Add1ArgType),
+ NEONMAP1(vrndmq_v, floor, Add1ArgType),
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index be587ea8e697a..6ef05544667b2 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -66,7 +66,7 @@ float32x4_t test_vrndaq_f32(float32x4_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
// CHECK-A32-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]])
+// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_V_I]])
// CHECK-A32-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
@@ -91,7 +91,7 @@ float32x2_t test_vrndm_f32(float32x2_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
// CHECK-A32-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDMQ_V_I]])
// CHECK-A32-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
index f85deeeca757f..2ea80d5b8389b 100644
--- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -586,7 +586,7 @@ float16x8_t test_vrndaq_f16(float16x8_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> [[VRNDM_V_I]])
+// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.floor.v4f16(<4 x half> [[VRNDM_V_I]])
// CHECK-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <4 x half> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
@@ -602,7 +602,7 @@ float16x4_t test_vrndm_f16(float16x4_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> [[VRNDMQ_V_I]])
+// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[VRNDMQ_V_I]])
// CHECK-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <8 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9b7dd8099368d..a38d201cb623c 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -682,7 +682,6 @@ def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
// De-interleaving vector loads from N-element structures.
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7ba6d411bc7b5..b96de6410cb13 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -719,6 +719,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
.StartsWith("vqaddu.", Intrinsic::uadd_sat)
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
.StartsWith("vqsubu.", Intrinsic::usub_sat)
+ .StartsWith("vrintm.", Intrinsic::floor)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index be4876d0667ab..d4f874e647869 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1598,6 +1598,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+ if (Subtarget->hasV8Ops()) {
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ }
+
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
@@ -1608,6 +1613,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3335f52f15555..7e6b71501fff9 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7316,7 +7316,7 @@ defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
-defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
// Cryptography instructions
diff --git a/llvm/test/CodeGen/ARM/vrint.ll b/llvm/test/CodeGen/ARM/vrint.ll
index 2391e9499479b..6675900f4f448 100644
--- a/llvm/test/CodeGen/ARM/vrint.ll
+++ b/llvm/test/CodeGen/ARM/vrint.ll
@@ -813,21 +813,7 @@ define <4 x half> @frintm_4h(<4 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_4h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s2, s0
-; CHECK-FP16-NEXT: vrintm.f16 s2, s2
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s0
-; CHECK-FP16-NEXT: vmov r1, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s1
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr d0, d16, d16
+; CHECK-FP16-NEXT: vrintm.f16 d0, d0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A)
ret <4 x half> %tmp3
@@ -977,35 +963,7 @@ define <8 x half> @frintm_8h(<8 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_8h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s4, s2
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s2
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s3
-; CHECK-FP16-NEXT: vmov.16 d17[0], r1
-; CHECK-FP16-NEXT: vmov.16 d17[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s3
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[2], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s0
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[3], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s0
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr q0, q8, q8
+; CHECK-FP16-NEXT: vrintm.f16 q0, q0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A)
ret <8 x half> %tmp3
@@ -1031,9 +989,7 @@ define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s3, s1
-; CHECK-NEXT: vrintm.f32 s2, s0
-; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vrintm.f32 d0, d0
; CHECK-NEXT: bx lr
%tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
@@ -1065,11 +1021,7 @@ define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s7, s3
-; CHECK-NEXT: vrintm.f32 s6, s2
-; CHECK-NEXT: vrintm.f32 s5, s1
-; CHECK-NEXT: vrintm.f32 s4, s0
-; CHECK-NEXT: vorr q0, q1, q1
+; CHECK-NEXT: vrintm.f32 q0, q0
; CHECK-NEXT: bx lr
%tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
|
@llvm/pr-subscribers-llvm-ir Author: David Green (davemgreen) ChangesThis marks ffloor as legal providing that armv8 and neon is present (or fullfp16 for the fp16 instructions). The existing arm_neon_vrintm intrinsics are auto-upgraded to llvm.floor. If this is OK I will update the other vrint intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/142559.diff 8 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 1cf8f6819b75a..e0bc2fb144e04 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -843,8 +843,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
- NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
- NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
+ NEONMAP1(vrndm_v, floor, Add1ArgType),
+ NEONMAP1(vrndmq_v, floor, Add1ArgType),
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index be587ea8e697a..6ef05544667b2 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -66,7 +66,7 @@ float32x4_t test_vrndaq_f32(float32x4_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
// CHECK-A32-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]])
+// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_V_I]])
// CHECK-A32-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
@@ -91,7 +91,7 @@ float32x2_t test_vrndm_f32(float32x2_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
// CHECK-A32-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDMQ_V_I]])
// CHECK-A32-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
index f85deeeca757f..2ea80d5b8389b 100644
--- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -586,7 +586,7 @@ float16x8_t test_vrndaq_f16(float16x8_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> [[VRNDM_V_I]])
+// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.floor.v4f16(<4 x half> [[VRNDM_V_I]])
// CHECK-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <4 x half> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
@@ -602,7 +602,7 @@ float16x4_t test_vrndm_f16(float16x4_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> [[VRNDMQ_V_I]])
+// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[VRNDMQ_V_I]])
// CHECK-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <8 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9b7dd8099368d..a38d201cb623c 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -682,7 +682,6 @@ def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
// De-interleaving vector loads from N-element structures.
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7ba6d411bc7b5..b96de6410cb13 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -719,6 +719,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
.StartsWith("vqaddu.", Intrinsic::uadd_sat)
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
.StartsWith("vqsubu.", Intrinsic::usub_sat)
+ .StartsWith("vrintm.", Intrinsic::floor)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index be4876d0667ab..d4f874e647869 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1598,6 +1598,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+ if (Subtarget->hasV8Ops()) {
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ }
+
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
@@ -1608,6 +1613,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3335f52f15555..7e6b71501fff9 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7316,7 +7316,7 @@ defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
-defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
// Cryptography instructions
diff --git a/llvm/test/CodeGen/ARM/vrint.ll b/llvm/test/CodeGen/ARM/vrint.ll
index 2391e9499479b..6675900f4f448 100644
--- a/llvm/test/CodeGen/ARM/vrint.ll
+++ b/llvm/test/CodeGen/ARM/vrint.ll
@@ -813,21 +813,7 @@ define <4 x half> @frintm_4h(<4 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_4h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s2, s0
-; CHECK-FP16-NEXT: vrintm.f16 s2, s2
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s0
-; CHECK-FP16-NEXT: vmov r1, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s1
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr d0, d16, d16
+; CHECK-FP16-NEXT: vrintm.f16 d0, d0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A)
ret <4 x half> %tmp3
@@ -977,35 +963,7 @@ define <8 x half> @frintm_8h(<8 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_8h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s4, s2
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s2
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s3
-; CHECK-FP16-NEXT: vmov.16 d17[0], r1
-; CHECK-FP16-NEXT: vmov.16 d17[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s3
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[2], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s0
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[3], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s0
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr q0, q8, q8
+; CHECK-FP16-NEXT: vrintm.f16 q0, q0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A)
ret <8 x half> %tmp3
@@ -1031,9 +989,7 @@ define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s3, s1
-; CHECK-NEXT: vrintm.f32 s2, s0
-; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vrintm.f32 d0, d0
; CHECK-NEXT: bx lr
%tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
@@ -1065,11 +1021,7 @@ define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s7, s3
-; CHECK-NEXT: vrintm.f32 s6, s2
-; CHECK-NEXT: vrintm.f32 s5, s1
-; CHECK-NEXT: vrintm.f32 s4, s0
-; CHECK-NEXT: vorr q0, q1, q1
+; CHECK-NEXT: vrintm.f32 q0, q0
; CHECK-NEXT: bx lr
%tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
|
@llvm/pr-subscribers-clang-codegen Author: David Green (davemgreen) ChangesThis marks ffloor as legal providing that armv8 and neon is present (or fullfp16 for the fp16 instructions). The existing arm_neon_vrintm intrinsics are auto-upgraded to llvm.floor. If this is OK I will update the other vrint intrinsics. Full diff: https://github.com/llvm/llvm-project/pull/142559.diff 8 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 1cf8f6819b75a..e0bc2fb144e04 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -843,8 +843,8 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
- NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
- NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
+ NEONMAP1(vrndm_v, floor, Add1ArgType),
+ NEONMAP1(vrndmq_v, floor, Add1ArgType),
NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
diff --git a/clang/test/CodeGen/arm-neon-directed-rounding.c b/clang/test/CodeGen/arm-neon-directed-rounding.c
index be587ea8e697a..6ef05544667b2 100644
--- a/clang/test/CodeGen/arm-neon-directed-rounding.c
+++ b/clang/test/CodeGen/arm-neon-directed-rounding.c
@@ -66,7 +66,7 @@ float32x4_t test_vrndaq_f32(float32x4_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
// CHECK-A32-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
-// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> [[VRNDM_V_I]])
+// CHECK-A32-NEXT: [[VRNDM_V1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_V_I]])
// CHECK-A32-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <2 x float> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <2 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
@@ -91,7 +91,7 @@ float32x2_t test_vrndm_f32(float32x2_t a) {
// CHECK-A32-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
// CHECK-A32-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
-// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> [[VRNDMQ_V_I]])
+// CHECK-A32-NEXT: [[VRNDMQ_V1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDMQ_V_I]])
// CHECK-A32-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <4 x float> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-A32-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <4 x i32>
// CHECK-A32-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
index f85deeeca757f..2ea80d5b8389b 100644
--- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -586,7 +586,7 @@ float16x8_t test_vrndaq_f16(float16x8_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <4 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
// CHECK-NEXT: [[VRNDM_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
-// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> [[VRNDM_V_I]])
+// CHECK-NEXT: [[VRNDM_V1_I:%.*]] = call <4 x half> @llvm.floor.v4f16(<4 x half> [[VRNDM_V_I]])
// CHECK-NEXT: [[VRNDM_V2_I:%.*]] = bitcast <4 x half> [[VRNDM_V1_I]] to <8 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[VRNDM_V2_I]] to <4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
@@ -602,7 +602,7 @@ float16x4_t test_vrndm_f16(float16x4_t a) {
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <8 x i16>
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8>
// CHECK-NEXT: [[VRNDMQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
-// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> [[VRNDMQ_V_I]])
+// CHECK-NEXT: [[VRNDMQ_V1_I:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[VRNDMQ_V_I]])
// CHECK-NEXT: [[VRNDMQ_V2_I:%.*]] = bitcast <8 x half> [[VRNDMQ_V1_I]] to <16 x i8>
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[VRNDMQ_V2_I]] to <8 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9b7dd8099368d..a38d201cb623c 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -682,7 +682,6 @@ def int_arm_neon_vrintn : Neon_1FloatArg_Intrinsic;
def int_arm_neon_vrintx : Neon_1Arg_Intrinsic;
def int_arm_neon_vrinta : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintz : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrintm : Neon_1Arg_Intrinsic;
def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
// De-interleaving vector loads from N-element structures.
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7ba6d411bc7b5..b96de6410cb13 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -719,6 +719,7 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
.StartsWith("vqaddu.", Intrinsic::uadd_sat)
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
.StartsWith("vqsubu.", Intrinsic::usub_sat)
+ .StartsWith("vrintm.", Intrinsic::floor)
.Default(Intrinsic::not_intrinsic);
if (ID != Intrinsic::not_intrinsic) {
NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index be4876d0667ab..d4f874e647869 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1598,6 +1598,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
+ if (Subtarget->hasV8Ops()) {
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ }
+
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
@@ -1608,6 +1613,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3335f52f15555..7e6b71501fff9 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7316,7 +7316,7 @@ defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
-defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
// Cryptography instructions
diff --git a/llvm/test/CodeGen/ARM/vrint.ll b/llvm/test/CodeGen/ARM/vrint.ll
index 2391e9499479b..6675900f4f448 100644
--- a/llvm/test/CodeGen/ARM/vrint.ll
+++ b/llvm/test/CodeGen/ARM/vrint.ll
@@ -813,21 +813,7 @@ define <4 x half> @frintm_4h(<4 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_4h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s2, s0
-; CHECK-FP16-NEXT: vrintm.f16 s2, s2
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s0
-; CHECK-FP16-NEXT: vmov r1, s2
-; CHECK-FP16-NEXT: vrintm.f16 s2, s1
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s2
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr d0, d16, d16
+; CHECK-FP16-NEXT: vrintm.f16 d0, d0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <4 x half> @llvm.floor.v4f16(<4 x half> %A)
ret <4 x half> %tmp3
@@ -977,35 +963,7 @@ define <8 x half> @frintm_8h(<8 x half> %A) nounwind {
;
; CHECK-FP16-LABEL: frintm_8h:
; CHECK-FP16: @ %bb.0:
-; CHECK-FP16-NEXT: vmovx.f16 s4, s2
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s2
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s3
-; CHECK-FP16-NEXT: vmov.16 d17[0], r1
-; CHECK-FP16-NEXT: vmov.16 d17[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s3
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[2], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmovx.f16 s4, s0
-; CHECK-FP16-NEXT: vrintm.f16 s4, s4
-; CHECK-FP16-NEXT: vmov.16 d17[3], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s0
-; CHECK-FP16-NEXT: vmovx.f16 s0, s1
-; CHECK-FP16-NEXT: vmov r1, s4
-; CHECK-FP16-NEXT: vrintm.f16 s4, s1
-; CHECK-FP16-NEXT: vrintm.f16 s0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[0], r1
-; CHECK-FP16-NEXT: vmov.16 d16[1], r0
-; CHECK-FP16-NEXT: vmov r0, s4
-; CHECK-FP16-NEXT: vmov.16 d16[2], r0
-; CHECK-FP16-NEXT: vmov r0, s0
-; CHECK-FP16-NEXT: vmov.16 d16[3], r0
-; CHECK-FP16-NEXT: vorr q0, q8, q8
+; CHECK-FP16-NEXT: vrintm.f16 q0, q0
; CHECK-FP16-NEXT: bx lr
%tmp3 = call <8 x half> @llvm.floor.v8f16(<8 x half> %A)
ret <8 x half> %tmp3
@@ -1031,9 +989,7 @@ define <2 x float> @frintm_2s(<2 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_2s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s3, s1
-; CHECK-NEXT: vrintm.f32 s2, s0
-; CHECK-NEXT: vmov.f64 d0, d1
+; CHECK-NEXT: vrintm.f32 d0, d0
; CHECK-NEXT: bx lr
%tmp3 = call <2 x float> @llvm.floor.v2f32(<2 x float> %A)
ret <2 x float> %tmp3
@@ -1065,11 +1021,7 @@ define <4 x float> @frintm_4s(<4 x float> %A) nounwind {
;
; CHECK-LABEL: frintm_4s:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vrintm.f32 s7, s3
-; CHECK-NEXT: vrintm.f32 s6, s2
-; CHECK-NEXT: vrintm.f32 s5, s1
-; CHECK-NEXT: vrintm.f32 s4, s0
-; CHECK-NEXT: vorr q0, q1, q1
+; CHECK-NEXT: vrintm.f32 q0, q0
; CHECK-NEXT: bx lr
%tmp3 = call <4 x float> @llvm.floor.v4f32(<4 x float> %A)
ret <4 x float> %tmp3
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp,c -- clang/lib/CodeGen/TargetBuiltins/ARM.cpp clang/test/CodeGen/arm-neon-directed-rounding.c clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp View the diff from clang-format here.diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index e0bc2fb14..25d5aa9dc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -590,324 +590,346 @@ struct ARMVectorIntrinsicInfo {
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
TypeModifier }
-static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
- NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
- NEONMAP0(splat_lane_v),
- NEONMAP0(splat_laneq_v),
- NEONMAP0(splatq_lane_v),
- NEONMAP0(splatq_laneq_v),
- NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
- NEONMAP1(vabs_v, arm_neon_vabs, 0),
- NEONMAP1(vabsq_v, arm_neon_vabs, 0),
- NEONMAP0(vadd_v),
- NEONMAP0(vaddhn_v),
- NEONMAP0(vaddq_v),
- NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
- NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
- NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
- NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
- NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
- NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
- NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
- NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
- NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
- NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
- NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
- NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
- NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
- NEONMAP1(vcage_v, arm_neon_vacge, 0),
- NEONMAP1(vcageq_v, arm_neon_vacge, 0),
- NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
- NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
- NEONMAP1(vcale_v, arm_neon_vacge, 0),
- NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
- NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
- NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
- NEONMAP0(vceqz_v),
- NEONMAP0(vceqzq_v),
- NEONMAP0(vcgez_v),
- NEONMAP0(vcgezq_v),
- NEONMAP0(vcgtz_v),
- NEONMAP0(vcgtzq_v),
- NEONMAP0(vclez_v),
- NEONMAP0(vclezq_v),
- NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
- NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
- NEONMAP0(vcltz_v),
- NEONMAP0(vcltzq_v),
- NEONMAP1(vclz_v, ctlz, Add1ArgType),
- NEONMAP1(vclzq_v, ctlz, Add1ArgType),
- NEONMAP1(vcnt_v, ctpop, Add1ArgType),
- NEONMAP1(vcntq_v, ctpop, Add1ArgType),
- NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
- NEONMAP0(vcvt_f16_s16),
- NEONMAP0(vcvt_f16_u16),
- NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
- NEONMAP0(vcvt_f32_v),
- NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
- NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvt_s16_f16),
- NEONMAP0(vcvt_s32_v),
- NEONMAP0(vcvt_s64_v),
- NEONMAP0(vcvt_u16_f16),
- NEONMAP0(vcvt_u32_v),
- NEONMAP0(vcvt_u64_v),
- NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
- NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
- NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
- NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
- NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
- NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
- NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
- NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
- NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
- NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
- NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
- NEONMAP0(vcvtq_f16_s16),
- NEONMAP0(vcvtq_f16_u16),
- NEONMAP0(vcvtq_f32_v),
- NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
- NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
- NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
- NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
- NEONMAP0(vcvtq_s16_f16),
- NEONMAP0(vcvtq_s32_v),
- NEONMAP0(vcvtq_s64_v),
- NEONMAP0(vcvtq_u16_f16),
- NEONMAP0(vcvtq_u32_v),
- NEONMAP0(vcvtq_u64_v),
- NEONMAP1(vdot_s32, arm_neon_sdot, 0),
- NEONMAP1(vdot_u32, arm_neon_udot, 0),
- NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
- NEONMAP1(vdotq_u32, arm_neon_udot, 0),
- NEONMAP0(vext_v),
- NEONMAP0(vextq_v),
- NEONMAP0(vfma_v),
- NEONMAP0(vfmaq_v),
- NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
- NEONMAP0(vld1_dup_v),
- NEONMAP1(vld1_v, arm_neon_vld1, 0),
- NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
- NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
- NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
- NEONMAP0(vld1q_dup_v),
- NEONMAP1(vld1q_v, arm_neon_vld1, 0),
- NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
- NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
- NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
- NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
- NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
- NEONMAP1(vld2_v, arm_neon_vld2, 0),
- NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
- NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
- NEONMAP1(vld2q_v, arm_neon_vld2, 0),
- NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
- NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
- NEONMAP1(vld3_v, arm_neon_vld3, 0),
- NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
- NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
- NEONMAP1(vld3q_v, arm_neon_vld3, 0),
- NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
- NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
- NEONMAP1(vld4_v, arm_neon_vld4, 0),
- NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
- NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
- NEONMAP1(vld4q_v, arm_neon_vld4, 0),
- NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
- NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
- NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
- NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
- NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
- NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
- NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
- NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
- NEONMAP0(vmovl_v),
- NEONMAP0(vmovn_v),
- NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
- NEONMAP0(vmull_v),
- NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
- NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
- NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
- NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
- NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
- NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
- NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
- NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
- NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
- NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
- NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
- NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
- NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
- NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
- NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
- NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
- NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
- NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
- NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
- NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
- NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
- NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
- NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
- NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
- NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
- NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
- NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
- NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
- NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
- NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
- NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
- NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
- NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
- NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
- NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
- NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
- NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
- NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
- NEONMAP0(vrndi_v),
- NEONMAP0(vrndiq_v),
- NEONMAP1(vrndm_v, floor, Add1ArgType),
- NEONMAP1(vrndmq_v, floor, Add1ArgType),
- NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
- NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
- NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
- NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
- NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
- NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
- NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
- NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
- NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
- NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
- NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
- NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
- NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
- NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
- NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
- NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
- NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
- NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
- NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
- NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
- NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
- NEONMAP0(vshl_n_v),
- NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshll_n_v),
- NEONMAP0(vshlq_n_v),
- NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
- NEONMAP0(vshr_n_v),
- NEONMAP0(vshrn_n_v),
- NEONMAP0(vshrq_n_v),
- NEONMAP1(vst1_v, arm_neon_vst1, 0),
- NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
- NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
- NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
- NEONMAP1(vst1q_v, arm_neon_vst1, 0),
- NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
- NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
- NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
- NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
- NEONMAP1(vst2_v, arm_neon_vst2, 0),
- NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
- NEONMAP1(vst2q_v, arm_neon_vst2, 0),
- NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
- NEONMAP1(vst3_v, arm_neon_vst3, 0),
- NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
- NEONMAP1(vst3q_v, arm_neon_vst3, 0),
- NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
- NEONMAP1(vst4_v, arm_neon_vst4, 0),
- NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
- NEONMAP1(vst4q_v, arm_neon_vst4, 0),
- NEONMAP0(vsubhn_v),
- NEONMAP0(vtrn_v),
- NEONMAP0(vtrnq_v),
- NEONMAP0(vtst_v),
- NEONMAP0(vtstq_v),
- NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
- NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
- NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
- NEONMAP0(vuzp_v),
- NEONMAP0(vuzpq_v),
- NEONMAP0(vzip_v),
- NEONMAP0(vzipq_v)
-};
+static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[] = {
+ NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
+ NEONMAP0(splat_lane_v),
+ NEONMAP0(splat_laneq_v),
+ NEONMAP0(splatq_lane_v),
+ NEONMAP0(splatq_laneq_v),
+ NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vabs_v, arm_neon_vabs, 0),
+ NEONMAP1(vabsq_v, arm_neon_vabs, 0),
+ NEONMAP0(vadd_v),
+ NEONMAP0(vaddhn_v),
+ NEONMAP0(vaddq_v),
+ NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
+ NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
+ NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
+ NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
+ NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
+ NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
+ NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
+ NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
+ NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
+ NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
+ NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
+ NEONMAP1(vcage_v, arm_neon_vacge, 0),
+ NEONMAP1(vcageq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcale_v, arm_neon_vacge, 0),
+ NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
+ NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
+ NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
+ NEONMAP0(vceqz_v),
+ NEONMAP0(vceqzq_v),
+ NEONMAP0(vcgez_v),
+ NEONMAP0(vcgezq_v),
+ NEONMAP0(vcgtz_v),
+ NEONMAP0(vcgtzq_v),
+ NEONMAP0(vclez_v),
+ NEONMAP0(vclezq_v),
+ NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
+ NEONMAP0(vcltz_v),
+ NEONMAP0(vcltzq_v),
+ NEONMAP1(vclz_v, ctlz, Add1ArgType),
+ NEONMAP1(vclzq_v, ctlz, Add1ArgType),
+ NEONMAP1(vcnt_v, ctpop, Add1ArgType),
+ NEONMAP1(vcntq_v, ctpop, Add1ArgType),
+ NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
+ NEONMAP0(vcvt_f16_s16),
+ NEONMAP0(vcvt_f16_u16),
+ NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
+ NEONMAP0(vcvt_f32_v),
+ NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvt_s16_f16),
+ NEONMAP0(vcvt_s32_v),
+ NEONMAP0(vcvt_s64_v),
+ NEONMAP0(vcvt_u16_f16),
+ NEONMAP0(vcvt_u32_v),
+ NEONMAP0(vcvt_u64_v),
+ NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
+ NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
+ NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
+ NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
+ NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
+ NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
+ NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
+ NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
+ NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
+ NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
+ NEONMAP0(vcvtq_f16_s16),
+ NEONMAP0(vcvtq_f16_u16),
+ NEONMAP0(vcvtq_f32_v),
+ NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
+ NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
+ NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
+ NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
+ NEONMAP0(vcvtq_s16_f16),
+ NEONMAP0(vcvtq_s32_v),
+ NEONMAP0(vcvtq_s64_v),
+ NEONMAP0(vcvtq_u16_f16),
+ NEONMAP0(vcvtq_u32_v),
+ NEONMAP0(vcvtq_u64_v),
+ NEONMAP1(vdot_s32, arm_neon_sdot, 0),
+ NEONMAP1(vdot_u32, arm_neon_udot, 0),
+ NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
+ NEONMAP1(vdotq_u32, arm_neon_udot, 0),
+ NEONMAP0(vext_v),
+ NEONMAP0(vextq_v),
+ NEONMAP0(vfma_v),
+ NEONMAP0(vfmaq_v),
+ NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vld1_dup_v),
+ NEONMAP1(vld1_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
+ NEONMAP0(vld1q_dup_v),
+ NEONMAP1(vld1q_v, arm_neon_vld1, 0),
+ NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
+ NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
+ NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
+ NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
+ NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2_v, arm_neon_vld2, 0),
+ NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
+ NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
+ NEONMAP1(vld2q_v, arm_neon_vld2, 0),
+ NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
+ NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3_v, arm_neon_vld3, 0),
+ NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
+ NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
+ NEONMAP1(vld3q_v, arm_neon_vld3, 0),
+ NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
+ NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4_v, arm_neon_vld4, 0),
+ NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
+ NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
+ NEONMAP1(vld4q_v, arm_neon_vld4, 0),
+ NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
+ NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
+ NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
+ NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
+ NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
+ NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
+ NEONMAP0(vmovl_v),
+ NEONMAP0(vmovn_v),
+ NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP0(vmull_v),
+ NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
+ NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
+ NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
+ NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
+ NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
+ NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
+ NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
+ NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
+ NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
+ NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
+ NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
+ NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
+ NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
+ NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
+ NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
+ NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
+ NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
+ NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
+ NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
+ NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
+ NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
+ NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
+ NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
+ NEONMAP0(vrndi_v),
+ NEONMAP0(vrndiq_v),
+ NEONMAP1(vrndm_v, floor, Add1ArgType),
+ NEONMAP1(vrndmq_v, floor, Add1ArgType),
+ NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
+ NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
+ NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
+ NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
+ NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
+ NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
+ NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
+ NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
+ NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
+ NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
+ NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
+ NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
+ NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
+ NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
+ NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
+ NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
+ NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
+ NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
+ NEONMAP0(vshl_n_v),
+ NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshll_n_v),
+ NEONMAP0(vshlq_n_v),
+ NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts,
+ Add1ArgType | UnsignedAlts),
+ NEONMAP0(vshr_n_v),
+ NEONMAP0(vshrn_n_v),
+ NEONMAP0(vshrq_n_v),
+ NEONMAP1(vst1_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
+ NEONMAP1(vst1q_v, arm_neon_vst1, 0),
+ NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
+ NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
+ NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
+ NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2_v, arm_neon_vst2, 0),
+ NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
+ NEONMAP1(vst2q_v, arm_neon_vst2, 0),
+ NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3_v, arm_neon_vst3, 0),
+ NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
+ NEONMAP1(vst3q_v, arm_neon_vst3, 0),
+ NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4_v, arm_neon_vst4, 0),
+ NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
+ NEONMAP1(vst4q_v, arm_neon_vst4, 0),
+ NEONMAP0(vsubhn_v),
+ NEONMAP0(vtrn_v),
+ NEONMAP0(vtrnq_v),
+ NEONMAP0(vtst_v),
+ NEONMAP0(vtstq_v),
+ NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
+ NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
+ NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
+ NEONMAP0(vuzp_v),
+ NEONMAP0(vuzpq_v),
+ NEONMAP0(vzip_v),
+ NEONMAP0(vzipq_v)};
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(splat_lane_v),
|
NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), | ||
NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), | ||
NEONMAP1(vrndm_v, floor, Add1ArgType), | ||
NEONMAP1(vrndmq_v, floor, Add1ArgType), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For AArch64, we have C++ code that chooses between llvm.floor and llvm.experimental.constrained.floor. Do we want the same for 32-bit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe anyone has made constrained intrinsics work for AArch32.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I guess this is fine, then.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), | ||
NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), | ||
NEONMAP1(vrndm_v, floor, Add1ArgType), | ||
NEONMAP1(vrndmq_v, floor, Add1ArgType), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I guess this is fine, then.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/18242 Here is the relevant piece of the build log for the reference
|
This marks ffloor as legal providing that armv8 and neon is present (or fullfp16 for the fp16 instructions). The existing arm_neon_vrintm intrinsics are auto-upgraded to llvm.floor.
If this is OK I will update the other vrint intrinsics.