diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 777e376d3c4ff9..a9db423caec2d3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34749,6 +34749,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, return SDValue(); // Shuffle the constant bits according to the mask. + SDLoc DL(Root); APInt UndefElts(NumMaskElts, 0); APInt ZeroElts(NumMaskElts, 0); APInt ConstantElts(NumMaskElts, 0); @@ -34786,6 +34787,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, } assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); + // Attempt to create a zero vector. + if ((UndefElts | ZeroElts).isAllOnesValue()) + return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); + // Create the constant data. MVT MaskSVT; if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) @@ -34794,8 +34799,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef Ops, MaskSVT = MVT::getIntegerVT(MaskSizeInBits); MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); + if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) + return SDValue(); - SDLoc DL(Root); SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); return DAG.getBitcast(VT, CstOp); } diff --git a/llvm/test/CodeGen/X86/pr45443.ll b/llvm/test/CodeGen/X86/pr45443.ll new file mode 100644 index 00000000000000..1e40ab94e9ca45 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr45443.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64 + +define <16 x float> @PR45443() { +; CHECK-LABEL: PR45443: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0 +; CHECK-NEXT: ret{{[l|q]}} +bb: + %tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> , <4 x i32> ) + %tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> , <16 x float> undef) + %tmp5 = icmp ult <16 x i32> %tmp, + %tmp6 = and <16 x i32> %tmp, + %tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer + %tmp8 = and <16 x i1> %tmp7, %tmp5 + %tmp9 = select fast <16 x i1> %tmp8, <16 x float> , <16 x float> %tmp4 + ret <16 x float> %tmp9 +} +declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) +declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>)