Skip to content

Commit

Permalink
[X86][SSE] combineX86ShufflesConstants - early out for zeroable vecto…
Browse files Browse the repository at this point in the history
…rs (PR45443)

Shuffle combining can insert zero byte sized elements into the shuffle mask, which combineX86ShufflesConstants will attempt to fold without taking into account whether the byte-sized type is legal (e.g. AVX512F only targets).

If we have a full-zeroable vector then we should just return a zero version of the root type, otherwise if the type isn't valid we should bail.

Fixes PR45443
  • Loading branch information
RKSimon committed Apr 7, 2020
1 parent 6b3353e commit e3b6059
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
8 changes: 7 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34749,6 +34749,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
return SDValue();

// Shuffle the constant bits according to the mask.
SDLoc DL(Root);
APInt UndefElts(NumMaskElts, 0);
APInt ZeroElts(NumMaskElts, 0);
APInt ConstantElts(NumMaskElts, 0);
Expand Down Expand Up @@ -34786,6 +34787,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
}
assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());

// Attempt to create a zero vector.
if ((UndefElts | ZeroElts).isAllOnesValue())
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);

// Create the constant data.
MVT MaskSVT;
if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
Expand All @@ -34794,8 +34799,9 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
MaskSVT = MVT::getIntegerVT(MaskSizeInBits);

MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
return SDValue();

SDLoc DL(Root);
SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
return DAG.getBitcast(VT, CstOp);
}
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/X86/pr45443.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64

define <16 x float> @PR45443() {
; CHECK-LABEL: PR45443:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
; CHECK-NEXT: ret{{[l|q]}}
bb:
%tmp = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> <i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040, i32 1090519040>, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>)
%tmp4 = tail call fast <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> <float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000, float 0x3FE6300000000000>, <16 x float> undef)
%tmp5 = icmp ult <16 x i32> %tmp, <i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216>
%tmp6 = and <16 x i32> %tmp, <i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215, i32 16777215>
%tmp7 = icmp ne <16 x i32> %tmp6, zeroinitializer
%tmp8 = and <16 x i1> %tmp7, %tmp5
%tmp9 = select fast <16 x i1> %tmp8, <16 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, <16 x float> %tmp4
ret <16 x float> %tmp9
}
declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>)

0 comments on commit e3b6059

Please sign in to comment.