Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAG] visitEXTRACT_SUBVECTOR - don't return early on failure of EXTRACT_SUBVECTOR(INSERT_SUBVECTOR()) -> BITCAST fold #133695

Merged
merged 1 commit into from
Mar 31, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
@@ -25532,26 +25532,24 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Handle only simple case where vector being inserted and vector
// being extracted are of same size.
EVT SmallVT = V.getOperand(1).getValueType();
if (!NVT.bitsEq(SmallVT))
return SDValue();

// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
ExtIdx * NVT.getScalarSizeInBits()) {
if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
return SDValue();

return DAG.getBitcast(NVT, V.getOperand(1));
if (NVT.bitsEq(SmallVT)) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
ExtIdx * NVT.getScalarSizeInBits()) {
if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))
return DAG.getBitcast(NVT, V.getOperand(1));
} else {
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
N->getOperand(1));
}
}
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
N->getOperand(1));
}

if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
45 changes: 22 additions & 23 deletions llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
Original file line number Diff line number Diff line change
@@ -66,39 +66,38 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
; CHECK-NEXT: s_mov_b32 s6, s4
; CHECK-NEXT: s_mov_b32 s5, s3
; CHECK-NEXT: s_mov_b32 s4, s2
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 20, v1
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 16, v1
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 12, v1
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 8, v1
; CHECK-NEXT: v_add_i32_e32 v8, vcc, 4, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 12, v1
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 8, v1
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v1
; CHECK-NEXT: v_add_i32_e32 v6, vcc, 20, v1
; CHECK-NEXT: v_add_i32_e32 v7, vcc, 16, v1
; CHECK-NEXT: v_mov_b32_e32 v9, s0
; CHECK-NEXT: v_add_i32_e32 v10, vcc, 20, v2
; CHECK-NEXT: v_add_i32_e32 v11, vcc, 16, v2
; CHECK-NEXT: v_add_i32_e32 v10, vcc, 12, v2
; CHECK-NEXT: v_add_i32_e32 v11, vcc, 8, v2
; CHECK-NEXT: s_mov_b32 m0, -1
; CHECK-NEXT: ds_read_b32 v7, v3
; CHECK-NEXT: ds_read_b32 v6, v4
; CHECK-NEXT: ds_read_b32 v5, v5
; CHECK-NEXT: ds_read_b32 v4, v8
; CHECK-NEXT: ds_read_b32 v8, v0
; CHECK-NEXT: ds_read_b32 v5, v3
; CHECK-NEXT: ds_read_b32 v4, v4
; CHECK-NEXT: ds_read_b32 v8, v6
; CHECK-NEXT: ds_read_b32 v7, v7
; CHECK-NEXT: ds_read_b32 v6, v0
; CHECK-NEXT: ds_read_b32 v3, v1
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 12, v2
; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2
; CHECK-NEXT: v_add_i32_e32 v13, vcc, 4, v2
; CHECK-NEXT: v_add_i32_e32 v0, vcc, 4, v2
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 20, v2
; CHECK-NEXT: v_add_i32_e32 v12, vcc, 16, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc
; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc
; CHECK-NEXT: ds_read_b32 v0, v11
; CHECK-NEXT: s_waitcnt expcnt(1)
; CHECK-NEXT: ds_read_b32 v5, v1
; CHECK-NEXT: ds_read_b32 v4, v12
; CHECK-NEXT: ds_read_b32 v3, v13
; CHECK-NEXT: ds_read_b32 v4, v11
; CHECK-NEXT: ds_read_b32 v3, v0
; CHECK-NEXT: ds_read_b32 v1, v1
; CHECK-NEXT: ds_read_b32 v0, v12
; CHECK-NEXT: ds_read_b32 v5, v10
; CHECK-NEXT: ds_read_b32 v2, v2
; CHECK-NEXT: ds_read_b32 v1, v10
; CHECK-NEXT: s_waitcnt lgkmcnt(5)
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: exp mrt0 off, off, off, off
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc
; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
; CHECK-NEXT: s_endpgm
%load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
Original file line number Diff line number Diff line change
@@ -449,9 +449,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7]
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7]
; AVX512-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1
; AVX512-NEXT: vmovq %xmm0, 32(%r9)
; AVX512-NEXT: vmovdqa %ymm1, (%r9)
; AVX512-NEXT: vmovdqa %ymm2, (%r9)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
@@ -476,9 +475,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15]
; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7]
; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1
; AVX512-FCP-NEXT: vmovq %xmm0, 32(%r9)
; AVX512-FCP-NEXT: vmovdqa %ymm1, (%r9)
; AVX512-FCP-NEXT: vmovdqa %ymm2, (%r9)
; AVX512-FCP-NEXT: vzeroupper
; AVX512-FCP-NEXT: retq
;
@@ -504,9 +502,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7]
; AVX512DQ-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1
; AVX512DQ-NEXT: vmovq %xmm0, 32(%r9)
; AVX512DQ-NEXT: vmovdqa %ymm1, (%r9)
; AVX512DQ-NEXT: vmovdqa %ymm2, (%r9)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
@@ -531,9 +528,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15]
; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7]
; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1
; AVX512DQ-FCP-NEXT: vmovq %xmm0, 32(%r9)
; AVX512DQ-FCP-NEXT: vmovdqa %ymm1, (%r9)
; AVX512DQ-FCP-NEXT: vmovdqa %ymm2, (%r9)
; AVX512DQ-FCP-NEXT: vzeroupper
; AVX512DQ-FCP-NEXT: retq
;
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.