Skip to content

[AMDGPU] upgrade test to use auto-generated llc checks #145832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2025

Conversation

ssahasra
Copy link
Collaborator

This makes it easier to fix the failure in #145720

@llvmbot
Copy link
Member

llvmbot commented Jun 26, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Sameer Sahasrabuddhe (ssahasra)

Changes

This makes it easier to fix the failure in #145720


Patch is 24.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145832.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll (+324-102)
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index a8fab161e1ffb..34260c49ff92c 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GCN %s
 
@@ -7,16 +8,6 @@
 
 @LDS = linkonce_odr hidden local_unnamed_addr addrspace(3) global i32 poison
 
-; GCN-LABEL: {{^}}simple_barrier:
-; GCN: s_load_dword s
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_barrier
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: ; wave barrier
-; GCN-NOT: global_load_dword
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
 ; CHECK-LABEL: @simple_barrier(
 ; CHECK-NEXT:  bb:
@@ -32,6 +23,22 @@ define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: simple_barrier:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    ; wave barrier
+; GCN-NEXT:    s_load_dword s3, s[0:1], 0x4
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s2, s3, s2
+; GCN-NEXT:    v_mov_b32_e32 v1, s2
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   fence syncscope("workgroup") release
@@ -46,15 +53,6 @@ bb:
   ret void
 }
 
-; GCN-LABEL: {{^}}memory_phi_no_clobber:
-; GCN: s_load_dword s
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_waitcnt lgkmcnt(0)
-; GCN: s_barrier
-; GCN-NOT: global_load_dword
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond) {
 ; CHECK-LABEL: @memory_phi_no_clobber(
 ; CHECK-NEXT:  bb:
@@ -74,6 +72,32 @@ define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: memory_phi_no_clobber:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s2, 0
+; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT:    s_mov_b64 s[2:3], -1
+; GCN-NEXT:    s_cbranch_scc0 .LBB1_2
+; GCN-NEXT:  ; %bb.1: ; %if.else
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[2:3], 0
+; GCN-NEXT:  .LBB1_2: ; %Flow
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT:    s_cbranch_vccnz .LBB1_4
+; GCN-NEXT:  ; %bb.3: ; %if.then
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:  .LBB1_4: ; %if.end
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0x4
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s2, s2, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s2
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br i1 %cond, label %if.then, label %if.else
@@ -95,12 +119,6 @@ if.end:
   ret void
 }
 
-; GCN-LABEL: {{^}}memory_phi_clobber1:
-; GCN: s_load_dword s
-; GCN: s_barrier
-; GCN: global_store_dword
-; GCN: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond) {
 ; CHECK-LABEL: @memory_phi_clobber1(
 ; CHECK-NEXT:  bb:
@@ -121,6 +139,33 @@ define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond)
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: memory_phi_clobber1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s2, 0
+; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT:    s_mov_b64 s[2:3], -1
+; GCN-NEXT:    s_cbranch_scc0 .LBB2_2
+; GCN-NEXT:  ; %bb.1: ; %if.else
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_mov_b64 s[2:3], 0
+; GCN-NEXT:  .LBB2_2: ; %Flow
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT:    s_cbranch_vccnz .LBB2_4
+; GCN-NEXT:  ; %bb.3: ; %if.then
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 1
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:12
+; GCN-NEXT:  .LBB2_4: ; %if.end
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    global_load_dword v1, v0, s[0:1] offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_add_u32_e32 v1, s4, v1
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br i1 %cond, label %if.then, label %if.else
@@ -143,12 +188,6 @@ if.end:
   ret void
 }
 
-; GCN-LABEL: {{^}}memory_phi_clobber2:
-; GCN-DAG: s_load_dword s
-; GCN-DAG: global_store_dword
-; GCN: s_barrier
-; GCN: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond) {
 ; CHECK-LABEL: @memory_phi_clobber2(
 ; CHECK-NEXT:  bb:
@@ -169,6 +208,33 @@ define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond)
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: memory_phi_clobber2:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s2, 0
+; GCN-NEXT:    s_mov_b64 s[2:3], -1
+; GCN-NEXT:    s_cbranch_scc0 .LBB3_2
+; GCN-NEXT:  ; %bb.1: ; %if.else
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 1
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:12
+; GCN-NEXT:    s_mov_b64 s[2:3], 0
+; GCN-NEXT:  .LBB3_2: ; %Flow
+; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
+; GCN-NEXT:    s_cbranch_vccnz .LBB3_4
+; GCN-NEXT:  ; %bb.3: ; %if.then
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:  .LBB3_4: ; %if.end
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    global_load_dword v1, v0, s[0:1] offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_add_u32_e32 v1, s4, v1
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:8
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br i1 %cond, label %if.then, label %if.else
@@ -191,11 +257,6 @@ if.end:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_clobbering_loop1:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
 ; CHECK-LABEL: @no_clobbering_loop1(
 ; CHECK-NEXT:  bb:
@@ -212,6 +273,30 @@ define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_clobbering_loop1:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp1_b32 s0, 0
+; GCN-NEXT:    s_load_dword s4, s[2:3], 0x0
+; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GCN-NEXT:    s_xor_b64 s[0:1], s[0:1], -1
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT:  .LBB4_1: ; %while.cond
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_load_dword s5, s[2:3], 0x4
+; GCN-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s5, s5, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    global_store_dword v0, v1, s[2:3] offset:8
+; GCN-NEXT:    ; wave barrier
+; GCN-NEXT:    s_cbranch_vccnz .LBB4_1
+; GCN-NEXT:  ; %bb.2: ; %end
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br label %while.cond
@@ -229,11 +314,6 @@ end:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_clobbering_loop2:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, ptr addrspace(1) noalias %out, i32 %n) {
 ; CHECK-LABEL: @no_clobbering_loop2(
 ; CHECK-NEXT:  bb:
@@ -253,6 +333,28 @@ define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, pt
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_clobbering_loop2:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    s_load_dword s6, s[4:5], 0x34
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GCN-NEXT:  .LBB5_1: ; %while.cond
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_load_dword s5, s[0:1], 0x0
+; GCN-NEXT:    ; wave barrier
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s4, s5, s4
+; GCN-NEXT:    s_add_u32 s0, s0, 4
+; GCN-NEXT:    s_addc_u32 s1, s1, 0
+; GCN-NEXT:    s_add_i32 s6, s6, -1
+; GCN-NEXT:    s_cmp_eq_u32 s6, 0
+; GCN-NEXT:    s_cbranch_scc1 .LBB5_1
+; GCN-NEXT:  ; %bb.2: ; %end
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, s4
+; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br label %while.cond
@@ -273,10 +375,6 @@ end:
   ret void
 }
 
-; GCN-LABEL: {{^}}clobbering_loop:
-; GCN: s_load_dword s
-; GCN: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(1) %out, i1 %cc) {
 ; CHECK-LABEL: @clobbering_loop(
 ; CHECK-NEXT:  bb:
@@ -293,6 +391,29 @@ define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: clobbering_loop:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x34
+; GCN-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp1_b32 s0, 0
+; GCN-NEXT:    s_load_dword s2, s[8:9], 0x0
+; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GCN-NEXT:    s_xor_b64 s[0:1], s[0:1], -1
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT:  .LBB6_1: ; %while.cond
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    global_load_dword v1, v0, s[8:9] offset:4
+; GCN-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_add_u32_e32 v1, s2, v1
+; GCN-NEXT:    global_store_dword v0, v1, s[10:11] offset:4
+; GCN-NEXT:    ; wave barrier
+; GCN-NEXT:    s_cbranch_vccnz .LBB6_1
+; GCN-NEXT:  ; %bb.2: ; %end
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   br label %while.cond
@@ -310,11 +431,6 @@ end:
   ret void
 }
 
-; GCN-LABEL: {{^}}clobber_by_atomic_load:
-; GCN: s_load_dword s
-; GCN: global_load_dword {{.*}} glc
-; GCN: global_load_dword
-; GCN: global_store_dword
 define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
 ; CHECK-LABEL: @clobber_by_atomic_load(
 ; CHECK-NEXT:  bb:
@@ -328,6 +444,21 @@ define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
 ; CHECK-NEXT:    store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: clobber_by_atomic_load:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_dword v1, v0, s[0:1] offset:8 glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_wbinvl1_vol
+; GCN-NEXT:    global_load_dword v1, v0, s[0:1] offset:12
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_u32_e32 v1, s2, v1
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1] offset:16
+; GCN-NEXT:    s_endpgm
 bb:
   %i = load i32, ptr addrspace(1) %arg, align 4
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2
@@ -340,12 +471,6 @@ bb:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_alias_store:
-; GCN: ds_write_b32
-; GCN: s_barrier
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @no_alias_store(
 ; CHECK-NEXT:  entry:
@@ -357,6 +482,18 @@ define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr ad
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_alias_store:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    ds_write_b32 v0, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v1, s0
+; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   store i32 0, ptr addrspace(3) @LDS, align 4
   fence syncscope("workgroup") release
@@ -367,11 +504,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}may_alias_store:
-; GCN: global_store_dword
-; GCN: s_barrier
-; GCN: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @may_alias_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @may_alias_store(
 ; CHECK-NEXT:  entry:
@@ -383,6 +515,18 @@ define protected amdgpu_kernel void @may_alias_store(ptr addrspace(1) %in, ptr a
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: may_alias_store:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_store_dword v0, v0, s[2:3]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    global_load_dword v1, v0, s[0:1]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   store i32 0, ptr addrspace(1) %out, align 4
   fence syncscope("workgroup") release
@@ -393,12 +537,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_alias_volatile_store:
-; GCN: ds_write_b32
-; GCN: s_barrier
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @no_alias_volatile_store(
 ; CHECK-NEXT:  entry:
@@ -410,6 +548,18 @@ define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %i
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_alias_volatile_store:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    ds_write_b32 v0, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v1, s0
+; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   store volatile i32 0, ptr addrspace(3) @LDS, align 4
   fence syncscope("workgroup") release
@@ -420,11 +570,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_alias_atomic_rmw_relaxed:
-; GCN: ds_add_u32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
 ; CHECK-NEXT:  entry:
@@ -433,6 +578,18 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_alias_atomic_rmw_relaxed:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 5
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    ds_add_u32 v1, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    global_store_dword v1, v0, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic
   %ld = load i32, ptr addrspace(1) %in, align 4
@@ -440,11 +597,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_alias_atomic_cmpxchg:
-; GCN: ds_cmpst_b32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) {
 ; CHECK-LABEL: @no_alias_atomic_cmpxchg(
 ; CHECK-NEXT:  entry:
@@ -456,6 +608,22 @@ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %i
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_alias_atomic_cmpxchg:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s6, s[4:5], 0x34
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 7
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, s6
+; GCN-NEXT:    ds_cmpst_b32 v1, v0, v2
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    global_store_dword v1, v0, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   %unused = cmpxchg ptr addrspace(3) @LDS, i32 7, i32 %swap seq_cst monotonic
   fence syncscope("workgroup") release
@@ -466,11 +634,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}no_alias_atomic_rmw:
-; GCN: ds_add_u32
-; GCN: s_load_dword s
-; GCN-NOT: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @no_alias_atomic_rmw(
 ; CHECK-NEXT:  entry:
@@ -482,6 +645,20 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, p
 ; CHECK-NEXT:    store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
+; GCN-LABEL: no_alias_atomic_rmw:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 5
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    ds_add_u32 v1, v0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_barrier
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    global_store_dword v1, v0, s[2:3]
+; GCN-NEXT:    s_endpgm
 entry:
   %unused = atomicrmw add ptr addrspace(3) @LDS, i32 5 seq_cst
   fence syncscope("workgroup") release
@@ -492,10 +669,6 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}may_alias_atomic_cmpxchg:
-; GCN: global_atomic_cmpswap
-; GCN: global_load_dword
-; GCN: global_store_dword
 define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %swap) {
 ; CHECK-LABEL: @may_alias_atomic_cmpxchg(
 ; CHECK-NEXT:  entry:
@@ -507,6 +680,22 @@ define protected amdgpu_kernel v...
[truncated]

@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should switch the opt run line to use new PM too after

@ssahasra ssahasra merged commit 741e1ca into main Jun 26, 2025
9 checks passed
@ssahasra ssahasra deleted the users/ssahasra/lit-auto-checks branch June 26, 2025 09:00
anthonyhatran pushed a commit to anthonyhatran/llvm-project that referenced this pull request Jun 26, 2025
rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants