Skip to content

Commit 7495f92

Browse files
mssefatarsenm
andauthored
[AMDGPU] Fix undefined scc register in successor block of SI_KILL terminators (llvm#134718)
Fix issue 131298 where an undefined $scc register causes verifier errors when using SI_KILL_F32_COND_IMM_TERMINATOR instructions. The problem occurs because the $scc register defined in a comparison before the kill terminator is used in successor blocks, but was not properly marked as live-in. This patch: - Adds code to check if SCC is used in the successor block - Adds SCC as a live-in to successor blocks - Handles both explicit and implicit uses of SCC With this patch the machine verifier no longer reports undefined $scc errors in following kill terminator instruction. Fixes llvm#131298 --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
1 parent 6aafd5e commit 7495f92

File tree

4 files changed

+235
-4
lines changed

4 files changed

+235
-4
lines changed

llvm/lib/CodeGen/FinalizeISel.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
4747
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4848
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
4949

50+
TLI->finalizeLowering(MF);
51+
5052
// Iterate through each instruction in the function, looking for pseudos.
5153
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
5254
MachineBasicBlock *MBB = &*I;
@@ -74,9 +76,6 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
7476
}
7577
}
7678
}
77-
78-
TLI->finalizeLowering(MF);
79-
8079
return {Changed, PreserveCFG};
8180
}
8281

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4513,7 +4513,7 @@ Register SITargetLowering::getRegisterByName(const char *RegName, LLT VT,
45134513
MachineBasicBlock *
45144514
SITargetLowering::splitKillBlock(MachineInstr &MI,
45154515
MachineBasicBlock *BB) const {
4516-
MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
4516+
MachineBasicBlock *SplitBB = BB->splitAt(MI, /*UpdateLiveIns=*/true);
45174517
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
45184518
MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
45194519
return SplitBB;
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -run-pass finalize-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: phi_use_def_before_kill
5+
tracksRegLiveness: true
6+
body: |
7+
; CHECK-LABEL: name: phi_use_def_before_kill
8+
; CHECK: bb.0:
9+
; CHECK-NEXT: successors: %bb.3(0x80000000)
10+
; CHECK-NEXT: liveins: $sgpr0, $sgpr1
11+
; CHECK-NEXT: {{ $}}
12+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1
13+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
14+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
15+
; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
16+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
17+
; CHECK-NEXT: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, [[V_ADD_F32_e64_]], 0, [[S_MOV_B32_1]], 0, implicit $mode, implicit $exec
18+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 -1082130432
19+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_2]]
20+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_1]], 0, [[COPY2]], killed [[V_CMP_GT_F32_e64_]], implicit $exec
21+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[V_CNDMASK_B32_e64_]]
22+
; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
23+
; CHECK-NEXT: S_CMP_LG_U32 [[COPY]], killed [[S_MOV_B32_3]], implicit-def $scc
24+
; CHECK-NEXT: SI_KILL_F32_COND_IMM_TERMINATOR [[V_ADD_F32_e64_]], 0, 2, implicit-def $vcc_lo, implicit $exec
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: bb.3:
27+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
28+
; CHECK-NEXT: liveins: $vcc_lo, $scc
29+
; CHECK-NEXT: {{ $}}
30+
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
31+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
32+
; CHECK-NEXT: S_BRANCH %bb.2
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: bb.1:
35+
; CHECK-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
36+
; CHECK-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_4]], 0, implicit $mode, implicit $exec
37+
; CHECK-NEXT: S_ENDPGM 0
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: bb.2:
40+
; CHECK-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
41+
; CHECK-NEXT: [[V_CMP_EQ_F32_e64_1:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_5]], 0, implicit $mode, implicit $exec
42+
; CHECK-NEXT: S_ENDPGM 0
43+
44+
bb.0:
45+
liveins: $sgpr0, $sgpr1
46+
%3:sgpr_32 = COPY $sgpr1
47+
%2:sgpr_32 = COPY $sgpr0
48+
%5:sgpr_32 = S_MOV_B32 1065353216
49+
%6:vgpr_32 = nofpexcept V_ADD_F32_e64 0, %2:sgpr_32, 0, killed %5:sgpr_32, 0, 0, implicit $mode, implicit $exec
50+
%7:sgpr_32 = S_MOV_B32 0
51+
%8:sreg_32_xm0_xexec = nofpexcept V_CMP_GT_F32_e64 0, %6:vgpr_32, 0, %7:sgpr_32, 0, implicit $mode, implicit $exec
52+
%9:sgpr_32 = S_MOV_B32 -1082130432
53+
%11:vgpr_32 = COPY killed %9:sgpr_32
54+
%10:vgpr_32 = V_CNDMASK_B32_e64 0, %7:sgpr_32, 0, %11:vgpr_32, killed %8:sreg_32_xm0_xexec, implicit $exec
55+
%0:sgpr_32 = COPY %10:vgpr_32
56+
%12:sreg_32 = S_MOV_B32 0
57+
S_CMP_LG_U32 %3:sgpr_32, killed %12:sreg_32, implicit-def $scc
58+
SI_KILL_F32_COND_IMM_PSEUDO %6:vgpr_32, 0, 2, implicit-def $vcc, implicit $exec
59+
S_CBRANCH_SCC1 %bb.1, implicit $scc
60+
S_CBRANCH_VCCNZ %bb.2, implicit $vcc
61+
S_BRANCH %bb.2
62+
63+
bb.1:
64+
%13:sgpr_32 = S_MOV_B32 0
65+
%14:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, %3:sgpr_32, 0, killed %13:sgpr_32, 0, implicit $mode, implicit $exec
66+
S_ENDPGM 0
67+
68+
bb.2:
69+
%15:sgpr_32 = S_MOV_B32 0
70+
%16:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, %3:sgpr_32, 0, killed %15:sgpr_32, 0, implicit $mode, implicit $exec
71+
S_ENDPGM 0
72+
73+
...

llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,6 +1956,165 @@ bb.1:
19561956
ret void
19571957
}
19581958

1959+
define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0 {
1960+
; SI-LABEL: scc_use_after_kill_inst:
1961+
; SI: ; %bb.0: ; %bb
1962+
; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
1963+
; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1964+
; SI-NEXT: s_mov_b64 s[2:3], exec
1965+
; SI-NEXT: s_cmp_lg_u32 s1, 0
1966+
; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1967+
; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1968+
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
1969+
; SI-NEXT: s_cbranch_scc0 .LBB17_6
1970+
; SI-NEXT: ; %bb.1: ; %bb
1971+
; SI-NEXT: s_andn2_b64 exec, exec, vcc
1972+
; SI-NEXT: s_cbranch_scc0 .LBB17_3
1973+
; SI-NEXT: ; %bb.2: ; %bb8
1974+
; SI-NEXT: s_mov_b32 s3, 0xf000
1975+
; SI-NEXT: s_mov_b32 s2, -1
1976+
; SI-NEXT: v_mov_b32_e32 v0, 8
1977+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1978+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1979+
; SI-NEXT: v_mov_b32_e32 v0, 4.0
1980+
; SI-NEXT: .LBB17_3: ; %phibb
1981+
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1982+
; SI-NEXT: s_cbranch_vccz .LBB17_5
1983+
; SI-NEXT: ; %bb.4: ; %bb10
1984+
; SI-NEXT: s_mov_b32 s3, 0xf000
1985+
; SI-NEXT: s_mov_b32 s2, -1
1986+
; SI-NEXT: v_mov_b32_e32 v0, 9
1987+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1988+
; SI-NEXT: s_waitcnt vmcnt(0)
1989+
; SI-NEXT: .LBB17_5: ; %end
1990+
; SI-NEXT: s_endpgm
1991+
; SI-NEXT: .LBB17_6:
1992+
; SI-NEXT: s_mov_b64 exec, 0
1993+
; SI-NEXT: exp null off, off, off, off done vm
1994+
; SI-NEXT: s_endpgm
1995+
;
1996+
; GFX10-WAVE64-LABEL: scc_use_after_kill_inst:
1997+
; GFX10-WAVE64: ; %bb.0: ; %bb
1998+
; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
1999+
; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
2000+
; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s1, 0
2001+
; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2002+
; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2003+
; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2004+
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
2005+
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_6
2006+
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
2007+
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
2008+
; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_3
2009+
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
2010+
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
2011+
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
2012+
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
2013+
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2014+
; GFX10-WAVE64-NEXT: .LBB17_3: ; %phibb
2015+
; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2016+
; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB17_5
2017+
; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
2018+
; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
2019+
; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
2020+
; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2021+
; GFX10-WAVE64-NEXT: .LBB17_5: ; %end
2022+
; GFX10-WAVE64-NEXT: s_endpgm
2023+
; GFX10-WAVE64-NEXT: .LBB17_6:
2024+
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
2025+
; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
2026+
; GFX10-WAVE64-NEXT: s_endpgm
2027+
;
2028+
; GFX10-WAVE32-LABEL: scc_use_after_kill_inst:
2029+
; GFX10-WAVE32: ; %bb.0: ; %bb
2030+
; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
2031+
; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
2032+
; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s1, 0
2033+
; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
2034+
; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
2035+
; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
2036+
; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, vcc_lo
2037+
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_6
2038+
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
2039+
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
2040+
; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_3
2041+
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
2042+
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
2043+
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
2044+
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
2045+
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2046+
; GFX10-WAVE32-NEXT: .LBB17_3: ; %phibb
2047+
; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2048+
; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB17_5
2049+
; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
2050+
; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
2051+
; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
2052+
; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2053+
; GFX10-WAVE32-NEXT: .LBB17_5: ; %end
2054+
; GFX10-WAVE32-NEXT: s_endpgm
2055+
; GFX10-WAVE32-NEXT: .LBB17_6:
2056+
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
2057+
; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
2058+
; GFX10-WAVE32-NEXT: s_endpgm
2059+
;
2060+
; GFX11-LABEL: scc_use_after_kill_inst:
2061+
; GFX11: ; %bb.0: ; %bb
2062+
; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
2063+
; GFX11-NEXT: s_mov_b64 s[2:3], exec
2064+
; GFX11-NEXT: s_cmp_lg_u32 s1, 0
2065+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2066+
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2067+
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2068+
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2069+
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
2070+
; GFX11-NEXT: s_cbranch_scc0 .LBB17_6
2071+
; GFX11-NEXT: ; %bb.1: ; %bb
2072+
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
2073+
; GFX11-NEXT: s_cbranch_scc0 .LBB17_3
2074+
; GFX11-NEXT: ; %bb.2: ; %bb8
2075+
; GFX11-NEXT: v_mov_b32_e32 v1, 8
2076+
; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
2077+
; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
2078+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2079+
; GFX11-NEXT: .LBB17_3: ; %phibb
2080+
; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2081+
; GFX11-NEXT: s_cbranch_vccz .LBB17_5
2082+
; GFX11-NEXT: ; %bb.4: ; %bb10
2083+
; GFX11-NEXT: v_mov_b32_e32 v0, 9
2084+
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
2085+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2086+
; GFX11-NEXT: .LBB17_5: ; %end
2087+
; GFX11-NEXT: s_endpgm
2088+
; GFX11-NEXT: .LBB17_6:
2089+
; GFX11-NEXT: s_mov_b64 exec, 0
2090+
; GFX11-NEXT: exp mrt0 off, off, off, off done
2091+
; GFX11-NEXT: s_endpgm
2092+
bb:
2093+
%tmp = fadd float %x, 1.000000e+00
2094+
%tmp1 = fcmp olt float 0.000000e+00, %tmp
2095+
%tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
2096+
%cmp.tmp2 = fcmp olt float %tmp2, 0.000000e+00
2097+
%uniform.cond = icmp eq i32 %y, 0
2098+
call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
2099+
br i1 %uniform.cond, label %phibb, label %bb8
2100+
2101+
phibb: ; preds = %bb8, %bb
2102+
%tmp5 = phi float [ %tmp2, %bb ], [ 4.000000e+00, %bb8 ]
2103+
%tmp6 = fcmp oeq float %tmp5, 0.000000e+00
2104+
br i1 %tmp6, label %bb10, label %end
2105+
2106+
bb8: ; preds = %bb
2107+
store volatile i32 8, ptr addrspace(1) poison, align 4
2108+
br label %phibb
2109+
2110+
bb10: ; preds = %phibb
2111+
store volatile i32 9, ptr addrspace(1) poison, align 4
2112+
br label %end
2113+
2114+
end: ; preds = %bb10, %phibb
2115+
ret void
2116+
}
2117+
19592118
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
19602119
declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
19612120
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

0 commit comments

Comments
 (0)