-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] upgrade test to use auto-generated llc checks #145832
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Sameer Sahasrabuddhe (ssahasra) ChangesThis makes it easier to fix the failure in #145720 Patch is 24.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145832.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index a8fab161e1ffb..34260c49ff92c 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GCN %s
@@ -7,16 +8,6 @@
@LDS = linkonce_odr hidden local_unnamed_addr addrspace(3) global i32 poison
-; GCN-LABEL: {{^}}simple_barrier:
-; GCN: s_load_dword s
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_barrier
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: ; wave barrier
-; GCN-NOT: global_load_dword
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
; CHECK-LABEL: @simple_barrier(
; CHECK-NEXT: bb:
@@ -32,6 +23,22 @@ define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: simple_barrier:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_load_dword s2, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: ; wave barrier
+; GCN-NEXT: s_load_dword s3, s[0:1], 0x4
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s3, s2
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
fence syncscope("workgroup") release
@@ -46,15 +53,6 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}memory_phi_no_clobber:
-; GCN: s_load_dword s
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_barrier
-; GCN-NOT: global_load_dword
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_no_clobber(
; CHECK-NEXT: bb:
@@ -74,6 +72,32 @@ define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: memory_phi_no_clobber:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s2, 0
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT: s_mov_b64 s[2:3], -1
+; GCN-NEXT: s_cbranch_scc0 .LBB1_2
+; GCN-NEXT: ; %bb.1: ; %if.else
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[2:3], 0
+; GCN-NEXT: .LBB1_2: ; %Flow
+; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT: s_cbranch_vccnz .LBB1_4
+; GCN-NEXT: ; %bb.3: ; %if.then
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: .LBB1_4: ; %if.end
+; GCN-NEXT: s_load_dword s2, s[0:1], 0x4
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s2, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br i1 %cond, label %if.then, label %if.else
@@ -95,12 +119,6 @@ if.end:
ret void
}
-; GCN-LABEL: {{^}}memory_phi_clobber1:
-; GCN: s_load_dword s
-; GCN: s_barrier
-; GCN: global_store_dword
-; GCN: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_clobber1(
; CHECK-NEXT: bb:
@@ -121,6 +139,33 @@ define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond)
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: memory_phi_clobber1:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s2, 0
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT: s_mov_b64 s[2:3], -1
+; GCN-NEXT: s_cbranch_scc0 .LBB2_2
+; GCN-NEXT: ; %bb.1: ; %if.else
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_mov_b64 s[2:3], 0
+; GCN-NEXT: .LBB2_2: ; %Flow
+; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT: s_cbranch_vccnz .LBB2_4
+; GCN-NEXT: ; %bb.3: ; %if.then
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 1
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:12
+; GCN-NEXT: .LBB2_4: ; %if.end
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: global_load_dword v1, v0, s[0:1] offset:4
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_u32_e32 v1, s4, v1
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br i1 %cond, label %if.then, label %if.else
@@ -143,12 +188,6 @@ if.end:
ret void
}
-; GCN-LABEL: {{^}}memory_phi_clobber2:
-; GCN-DAG: s_load_dword s
-; GCN-DAG: global_store_dword
-; GCN: s_barrier
-; GCN: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_clobber2(
; CHECK-NEXT: bb:
@@ -169,6 +208,33 @@ define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond)
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: memory_phi_clobber2:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s2, 0
+; GCN-NEXT: s_mov_b64 s[2:3], -1
+; GCN-NEXT: s_cbranch_scc0 .LBB3_2
+; GCN-NEXT: ; %bb.1: ; %if.else
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 1
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:12
+; GCN-NEXT: s_mov_b64 s[2:3], 0
+; GCN-NEXT: .LBB3_2: ; %Flow
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT: s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT: s_cbranch_vccnz .LBB3_4
+; GCN-NEXT: ; %bb.3: ; %if.then
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: .LBB3_4: ; %if.end
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: global_load_dword v1, v0, s[0:1] offset:4
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_u32_e32 v1, s4, v1
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br i1 %cond, label %if.then, label %if.else
@@ -191,11 +257,6 @@ if.end:
ret void
}
-; GCN-LABEL: {{^}}no_clobbering_loop1:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
; CHECK-LABEL: @no_clobbering_loop1(
; CHECK-NEXT: bb:
@@ -212,6 +273,30 @@ define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
; CHECK: end:
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_clobbering_loop1:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bitcmp1_b32 s0, 0
+; GCN-NEXT: s_load_dword s4, s[2:3], 0x0
+; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
+; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], -1
+; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT: .LBB4_1: ; %while.cond
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_load_dword s5, s[2:3], 0x4
+; GCN-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s5, s5, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: global_store_dword v0, v1, s[2:3] offset:8
+; GCN-NEXT: ; wave barrier
+; GCN-NEXT: s_cbranch_vccnz .LBB4_1
+; GCN-NEXT: ; %bb.2: ; %end
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br label %while.cond
@@ -229,11 +314,6 @@ end:
ret void
}
-; GCN-LABEL: {{^}}no_clobbering_loop2:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, ptr addrspace(1) noalias %out, i32 %n) {
; CHECK-LABEL: @no_clobbering_loop2(
; CHECK-NEXT: bb:
@@ -253,6 +333,28 @@ define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, pt
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_clobbering_loop2:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT: .LBB5_1: ; %while.cond
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_load_dword s5, s[0:1], 0x0
+; GCN-NEXT: ; wave barrier
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s4, s5, s4
+; GCN-NEXT: s_add_u32 s0, s0, 4
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_add_i32 s6, s6, -1
+; GCN-NEXT: s_cmp_eq_u32 s6, 0
+; GCN-NEXT: s_cbranch_scc1 .LBB5_1
+; GCN-NEXT: ; %bb.2: ; %end
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s4
+; GCN-NEXT: global_store_dword v0, v1, s[2:3]
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br label %while.cond
@@ -273,10 +375,6 @@ end:
ret void
}
-; GCN-LABEL: {{^}}clobbering_loop:
-; GCN: s_load_dword s
-; GCN: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(1) %out, i1 %cc) {
; CHECK-LABEL: @clobbering_loop(
; CHECK-NEXT: bb:
@@ -293,6 +391,29 @@ define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(
; CHECK: end:
; CHECK-NEXT: ret void
;
+; GCN-LABEL: clobbering_loop:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dword s0, s[4:5], 0x34
+; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bitcmp1_b32 s0, 0
+; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
+; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
+; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], -1
+; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT: .LBB6_1: ; %while.cond
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: global_load_dword v1, v0, s[8:9] offset:4
+; GCN-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_add_u32_e32 v1, s2, v1
+; GCN-NEXT: global_store_dword v0, v1, s[10:11] offset:4
+; GCN-NEXT: ; wave barrier
+; GCN-NEXT: s_cbranch_vccnz .LBB6_1
+; GCN-NEXT: ; %bb.2: ; %end
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
br label %while.cond
@@ -310,11 +431,6 @@ end:
ret void
}
-; GCN-LABEL: {{^}}clobber_by_atomic_load:
-; GCN: s_load_dword s
-; GCN: global_load_dword {{.*}} glc
-; GCN: global_load_dword
-; GCN: global_store_dword
define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
; CHECK-LABEL: @clobber_by_atomic_load(
; CHECK-NEXT: bb:
@@ -328,6 +444,21 @@ define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: clobber_by_atomic_load:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_load_dword s2, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: global_load_dword v1, v0, s[0:1] offset:8 glc
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_wbinvl1_vol
+; GCN-NEXT: global_load_dword v1, v0, s[0:1] offset:12
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_u32_e32 v1, s2, v1
+; GCN-NEXT: global_store_dword v0, v1, s[0:1] offset:16
+; GCN-NEXT: s_endpgm
bb:
%i = load i32, ptr addrspace(1) %arg, align 4
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2
@@ -340,12 +471,6 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}no_alias_store:
-; GCN: ds_write_b32
-; GCN: s_barrier
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @no_alias_store(
; CHECK-NEXT: entry:
@@ -357,6 +482,18 @@ define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr ad
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_alias_store:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ds_write_b32 v0, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, s0
+; GCN-NEXT: global_store_dword v0, v1, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
store i32 0, ptr addrspace(3) @LDS, align 4
fence syncscope("workgroup") release
@@ -367,11 +504,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}may_alias_store:
-; GCN: global_store_dword
-; GCN: s_barrier
-; GCN: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @may_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @may_alias_store(
; CHECK-NEXT: entry:
@@ -383,6 +515,18 @@ define protected amdgpu_kernel void @may_alias_store(ptr addrspace(1) %in, ptr a
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: may_alias_store:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: global_store_dword v0, v0, s[2:3]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: global_load_dword v1, v0, s[0:1]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: global_store_dword v0, v1, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
store i32 0, ptr addrspace(1) %out, align 4
fence syncscope("workgroup") release
@@ -393,12 +537,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}no_alias_volatile_store:
-; GCN: ds_write_b32
-; GCN: s_barrier
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @no_alias_volatile_store(
; CHECK-NEXT: entry:
@@ -410,6 +548,18 @@ define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %i
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_alias_volatile_store:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ds_write_b32 v0, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, s0
+; GCN-NEXT: global_store_dword v0, v1, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
store volatile i32 0, ptr addrspace(3) @LDS, align 4
fence syncscope("workgroup") release
@@ -420,11 +570,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}no_alias_atomic_rmw_relaxed:
-; GCN: ds_add_u32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
; CHECK-NEXT: entry:
@@ -433,6 +578,18 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_alias_atomic_rmw_relaxed:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 5
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: ds_add_u32 v1, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: global_store_dword v1, v0, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
%unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic
%ld = load i32, ptr addrspace(1) %in, align 4
@@ -440,11 +597,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}no_alias_atomic_cmpxchg:
-; GCN: ds_cmpst_b32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) {
; CHECK-LABEL: @no_alias_atomic_cmpxchg(
; CHECK-NEXT: entry:
@@ -456,6 +608,22 @@ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %i
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_alias_atomic_cmpxchg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s6, s[4:5], 0x34
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 7
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v2, s6
+; GCN-NEXT: ds_cmpst_b32 v1, v0, v2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: global_store_dword v1, v0, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
%unused = cmpxchg ptr addrspace(3) @LDS, i32 7, i32 %swap seq_cst monotonic
fence syncscope("workgroup") release
@@ -466,11 +634,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}no_alias_atomic_rmw:
-; GCN: ds_add_u32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; CHECK-LABEL: @no_alias_atomic_rmw(
; CHECK-NEXT: entry:
@@ -482,6 +645,20 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, p
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
+; GCN-LABEL: no_alias_atomic_rmw:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 5
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: ds_add_u32 v1, v0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_barrier
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: global_store_dword v1, v0, s[2:3]
+; GCN-NEXT: s_endpgm
entry:
%unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst
fence syncscope("workgroup") release
@@ -492,10 +669,6 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}may_alias_atomic_cmpxchg:
-; GCN: global_atomic_cmpswap
-; GCN: global_load_dword
-; GCN: global_store_dword
define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) {
; CHECK-LABEL: @may_alias_atomic_cmpxchg(
; CHECK-NEXT: entry:
@@ -507,6 +680,22 @@ define protected amdgpu_kernel v...
[truncated]
|
@@ -1,4 +1,5 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should switch the opt run line to use new PM too after
This makes it easier to fix the failure in llvm#145720
This makes it easier to fix the failure in llvm#145720
This makes it easier to fix the failure in #145720