Open
Description
This test case that mixes bf16 and f16 miscompiles to a vfwmacc.vv instruction when both Zvfh and Zvfbfwma are enabled.
define <4 x float> @fixed(<4 x float> %rd, <4 x half> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
%a_ext = fpext <4 x half> %a to <4 x float>
%b_ext = fpext <4 x bfloat> %b to <4 x float>
%fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a_ext, <4 x float> %b_ext, <4 x float> %rd)
ret <4 x float> %fma
}
This test case crashes with Zvfbfwma
define <4 x float> @fixed(<4 x float> %rd, <2 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
%b_ext = fpext <4 x bfloat> %b to <4 x float>
%1 = extractelement <2 x bfloat> %a, i64 1
%a_extract = fpext bfloat %1 to float
%a_insert = insertelement <4 x float> poison, float %a_extract, i64 0
%a_shuffle = shufflevector <4 x float> %a_insert, <4 x float> poison, <4 x i32> zeroinitializer
%fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a_shuffle, <4 x float> %b_ext, <4 x float> %rd)
ret <4 x float> %fma
}