@@ -1956,6 +1956,165 @@ bb.1:
1956
1956
ret void
1957
1957
}
1958
1958
1959
+ define amdgpu_ps void @scc_use_after_kill_inst (float inreg %x , i32 inreg %y ) #0 {
1960
+ ; SI-LABEL: scc_use_after_kill_inst:
1961
+ ; SI: ; %bb.0: ; %bb
1962
+ ; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
1963
+ ; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1964
+ ; SI-NEXT: s_mov_b64 s[2:3], exec
1965
+ ; SI-NEXT: s_cmp_lg_u32 s1, 0
1966
+ ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1967
+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1968
+ ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
1969
+ ; SI-NEXT: s_cbranch_scc0 .LBB17_6
1970
+ ; SI-NEXT: ; %bb.1: ; %bb
1971
+ ; SI-NEXT: s_andn2_b64 exec, exec, vcc
1972
+ ; SI-NEXT: s_cbranch_scc0 .LBB17_3
1973
+ ; SI-NEXT: ; %bb.2: ; %bb8
1974
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
1975
+ ; SI-NEXT: s_mov_b32 s2, -1
1976
+ ; SI-NEXT: v_mov_b32_e32 v0, 8
1977
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1978
+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1979
+ ; SI-NEXT: v_mov_b32_e32 v0, 4.0
1980
+ ; SI-NEXT: .LBB17_3: ; %phibb
1981
+ ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1982
+ ; SI-NEXT: s_cbranch_vccz .LBB17_5
1983
+ ; SI-NEXT: ; %bb.4: ; %bb10
1984
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
1985
+ ; SI-NEXT: s_mov_b32 s2, -1
1986
+ ; SI-NEXT: v_mov_b32_e32 v0, 9
1987
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1988
+ ; SI-NEXT: s_waitcnt vmcnt(0)
1989
+ ; SI-NEXT: .LBB17_5: ; %end
1990
+ ; SI-NEXT: s_endpgm
1991
+ ; SI-NEXT: .LBB17_6:
1992
+ ; SI-NEXT: s_mov_b64 exec, 0
1993
+ ; SI-NEXT: exp null off, off, off, off done vm
1994
+ ; SI-NEXT: s_endpgm
1995
+ ;
1996
+ ; GFX10-WAVE64-LABEL: scc_use_after_kill_inst:
1997
+ ; GFX10-WAVE64: ; %bb.0: ; %bb
1998
+ ; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
1999
+ ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
2000
+ ; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s1, 0
2001
+ ; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2002
+ ; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2003
+ ; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2004
+ ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
2005
+ ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_6
2006
+ ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
2007
+ ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
2008
+ ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB17_3
2009
+ ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
2010
+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
2011
+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
2012
+ ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
2013
+ ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2014
+ ; GFX10-WAVE64-NEXT: .LBB17_3: ; %phibb
2015
+ ; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2016
+ ; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB17_5
2017
+ ; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
2018
+ ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
2019
+ ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
2020
+ ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2021
+ ; GFX10-WAVE64-NEXT: .LBB17_5: ; %end
2022
+ ; GFX10-WAVE64-NEXT: s_endpgm
2023
+ ; GFX10-WAVE64-NEXT: .LBB17_6:
2024
+ ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
2025
+ ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
2026
+ ; GFX10-WAVE64-NEXT: s_endpgm
2027
+ ;
2028
+ ; GFX10-WAVE32-LABEL: scc_use_after_kill_inst:
2029
+ ; GFX10-WAVE32: ; %bb.0: ; %bb
2030
+ ; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
2031
+ ; GFX10-WAVE32-NEXT: s_mov_b32 s2, exec_lo
2032
+ ; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s1, 0
2033
+ ; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
2034
+ ; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
2035
+ ; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
2036
+ ; GFX10-WAVE32-NEXT: s_andn2_b32 s2, s2, vcc_lo
2037
+ ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_6
2038
+ ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
2039
+ ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
2040
+ ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB17_3
2041
+ ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
2042
+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
2043
+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
2044
+ ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
2045
+ ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2046
+ ; GFX10-WAVE32-NEXT: .LBB17_3: ; %phibb
2047
+ ; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
2048
+ ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB17_5
2049
+ ; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
2050
+ ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
2051
+ ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
2052
+ ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2053
+ ; GFX10-WAVE32-NEXT: .LBB17_5: ; %end
2054
+ ; GFX10-WAVE32-NEXT: s_endpgm
2055
+ ; GFX10-WAVE32-NEXT: .LBB17_6:
2056
+ ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
2057
+ ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
2058
+ ; GFX10-WAVE32-NEXT: s_endpgm
2059
+ ;
2060
+ ; GFX11-LABEL: scc_use_after_kill_inst:
2061
+ ; GFX11: ; %bb.0: ; %bb
2062
+ ; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
2063
+ ; GFX11-NEXT: s_mov_b64 s[2:3], exec
2064
+ ; GFX11-NEXT: s_cmp_lg_u32 s1, 0
2065
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2066
+ ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
2067
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
2068
+ ; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
2069
+ ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
2070
+ ; GFX11-NEXT: s_cbranch_scc0 .LBB17_6
2071
+ ; GFX11-NEXT: ; %bb.1: ; %bb
2072
+ ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
2073
+ ; GFX11-NEXT: s_cbranch_scc0 .LBB17_3
2074
+ ; GFX11-NEXT: ; %bb.2: ; %bb8
2075
+ ; GFX11-NEXT: v_mov_b32_e32 v1, 8
2076
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
2077
+ ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
2078
+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2079
+ ; GFX11-NEXT: .LBB17_3: ; %phibb
2080
+ ; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
2081
+ ; GFX11-NEXT: s_cbranch_vccz .LBB17_5
2082
+ ; GFX11-NEXT: ; %bb.4: ; %bb10
2083
+ ; GFX11-NEXT: v_mov_b32_e32 v0, 9
2084
+ ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
2085
+ ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2086
+ ; GFX11-NEXT: .LBB17_5: ; %end
2087
+ ; GFX11-NEXT: s_endpgm
2088
+ ; GFX11-NEXT: .LBB17_6:
2089
+ ; GFX11-NEXT: s_mov_b64 exec, 0
2090
+ ; GFX11-NEXT: exp mrt0 off, off, off, off done
2091
+ ; GFX11-NEXT: s_endpgm
2092
+ bb:
2093
+ %tmp = fadd float %x , 1 .000000e+00
2094
+ %tmp1 = fcmp olt float 0 .000000e+00 , %tmp
2095
+ %tmp2 = select i1 %tmp1 , float -1 .000000e+00 , float 0 .000000e+00
2096
+ %cmp.tmp2 = fcmp olt float %tmp2 , 0 .000000e+00
2097
+ %uniform.cond = icmp eq i32 %y , 0
2098
+ call void @llvm.amdgcn.kill (i1 %cmp.tmp2 )
2099
+ br i1 %uniform.cond , label %phibb , label %bb8
2100
+
2101
+ phibb: ; preds = %bb8, %bb
2102
+ %tmp5 = phi float [ %tmp2 , %bb ], [ 4 .000000e+00 , %bb8 ]
2103
+ %tmp6 = fcmp oeq float %tmp5 , 0 .000000e+00
2104
+ br i1 %tmp6 , label %bb10 , label %end
2105
+
2106
+ bb8: ; preds = %bb
2107
+ store volatile i32 8 , ptr addrspace (1 ) poison, align 4
2108
+ br label %phibb
2109
+
2110
+ bb10: ; preds = %phibb
2111
+ store volatile i32 9 , ptr addrspace (1 ) poison, align 4
2112
+ br label %end
2113
+
2114
+ end: ; preds = %bb10, %phibb
2115
+ ret void
2116
+ }
2117
+
1959
2118
declare void @llvm.amdgcn.exp.f32 (i32 immarg, i32 immarg, float , float , float , float , i1 immarg, i1 immarg) #3
1960
2119
declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32 (i32 immarg, float , float , float , float , <8 x i32 >, <4 x i32 >, i1 immarg, i32 immarg, i32 immarg) #1
1961
2120
declare <4 x float > @llvm.amdgcn.image.sample.c.1d.v4f32.f32 (i32 , float , float , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 ) #1
0 commit comments