From 27c0e136777a2db49dbb0caa888d561819230493 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 6 May 2024 00:28:49 +0200 Subject: [PATCH] Minor codegen improvements (#225) --- ptx/src/emit.rs | 88 +++++--- ptx/src/test/spirv_run/abs.ll | 14 +- ptx/src/test/spirv_run/activemask.ll | 6 +- ptx/src/test/spirv_run/add.ll | 6 +- ptx/src/test/spirv_run/add_global.ll | 6 +- ptx/src/test/spirv_run/add_non_coherent.ll | 6 +- ptx/src/test/spirv_run/add_param_ptr.ll | 26 ++- ptx/src/test/spirv_run/add_tuning.ll | 6 +- ptx/src/test/spirv_run/addc_cc.ll | 54 ++--- ptx/src/test/spirv_run/addc_cc2.ll | 56 ++--- ptx/src/test/spirv_run/alloca_call.ll | 26 ++- ptx/src/test/spirv_run/amdgpu_unnamed.ll | 32 +-- ptx/src/test/spirv_run/and.ll | 10 +- ptx/src/test/spirv_run/assertfail.ll | 32 +-- ptx/src/test/spirv_run/atom_add.ll | 14 +- ptx/src/test/spirv_run/atom_add_f16.ll | 14 +- ptx/src/test/spirv_run/atom_add_float.ll | 14 +- ptx/src/test/spirv_run/atom_cas.ll | 20 +- ptx/src/test/spirv_run/atom_inc.ll | 14 +- ptx/src/test/spirv_run/atom_ld_st.ll | 6 +- ptx/src/test/spirv_run/atom_ld_st_vec.ll | 18 +- ptx/src/test/spirv_run/atom_max_u32.ll | 10 +- ptx/src/test/spirv_run/b64tof64.ll | 12 +- ptx/src/test/spirv_run/barrier.ll | 4 +- ptx/src/test/spirv_run/bfe.ll | 14 +- ptx/src/test/spirv_run/bfi.ll | 18 +- ptx/src/test/spirv_run/bfind.ll | 46 ++-- ptx/src/test/spirv_run/bfind_shiftamt.ll | 40 ++-- ptx/src/test/spirv_run/block.ll | 6 +- ptx/src/test/spirv_run/bra.ll | 12 +- ptx/src/test/spirv_run/brev.ll | 6 +- ptx/src/test/spirv_run/call.ll | 12 +- ptx/src/test/spirv_run/call_bug.ll | 20 +- ptx/src/test/spirv_run/call_multi_return.ll | 32 +-- ptx/src/test/spirv_run/callprototype.ll | 16 +- ptx/src/test/spirv_run/carry_set_all.ll | 210 +++++++++--------- ptx/src/test/spirv_run/clz.ll | 10 +- ptx/src/test/spirv_run/const.ll | 18 +- ptx/src/test/spirv_run/constant_f32.ll | 6 +- ptx/src/test/spirv_run/constant_negative.ll | 6 +- ptx/src/test/spirv_run/cos.ll | 6 +- ptx/src/test/spirv_run/cvt_clamp.ll | 30 +-- ptx/src/test/spirv_run/cvt_f32_f16.ll | 6 +- ptx/src/test/spirv_run/cvt_f32_s32.ll | 30 +-- ptx/src/test/spirv_run/cvt_f64_f32.ll | 6 +- ptx/src/test/spirv_run/cvt_rni.ll | 14 +- ptx/src/test/spirv_run/cvt_rzi.ll | 14 +- ptx/src/test/spirv_run/cvt_s16_s8.ll | 10 +- ptx/src/test/spirv_run/cvt_s32_f32.ll | 14 +- ptx/src/test/spirv_run/cvt_s64_s32.ll | 6 +- ptx/src/test/spirv_run/cvt_sat_s_u.ll | 20 +- ptx/src/test/spirv_run/cvt_u32_s16.ll | 6 +- ptx/src/test/spirv_run/cvta.ll | 18 +- ptx/src/test/spirv_run/div_approx.ll | 10 +- ptx/src/test/spirv_run/dp4a.ll | 14 +- ptx/src/test/spirv_run/ex2.ll | 30 +-- ptx/src/test/spirv_run/extern_shared.ll | 6 +- ptx/src/test/spirv_run/extern_shared_call.ll | 18 +- ptx/src/test/spirv_run/fma.ll | 14 +- ptx/src/test/spirv_run/func_ptr.ll | 12 +- ptx/src/test/spirv_run/generic.ll | 12 +- ptx/src/test/spirv_run/global_array.ll | 12 +- ptx/src/test/spirv_run/lanemask_lt.ll | 12 +- ptx/src/test/spirv_run/ld_st.ll | 6 +- ptx/src/test/spirv_run/ld_st_implicit.ll | 20 +- ptx/src/test/spirv_run/ld_st_offset.ll | 14 +- ptx/src/test/spirv_run/lg2.ll | 6 +- ptx/src/test/spirv_run/local_align.ll | 6 +- ptx/src/test/spirv_run/mad_hi_cc.ll | 58 ++--- ptx/src/test/spirv_run/mad_s32.ll | 50 +++-- ptx/src/test/spirv_run/madc_cc.ll | 42 ++-- ptx/src/test/spirv_run/max.ll | 10 +- ptx/src/test/spirv_run/membar.ll | 6 +- ptx/src/test/spirv_run/min.ll | 10 +- ptx/src/test/spirv_run/mov.ll | 12 +- ptx/src/test/spirv_run/mov_address.ll | 12 +- ptx/src/test/spirv_run/mov_vector_cast.ll | 22 +- ptx/src/test/spirv_run/mul_ftz.ll | 10 +- ptx/src/test/spirv_run/mul_hi.ll | 6 +- ptx/src/test/spirv_run/mul_lo.ll | 6 +- ptx/src/test/spirv_run/mul_non_ftz.ll | 10 +- ptx/src/test/spirv_run/mul_wide.ll | 16 +- ptx/src/test/spirv_run/multireg.ll | 6 +- ptx/src/test/spirv_run/neg.ll | 6 +- .../test/spirv_run/non_scalar_ptr_offset.ll | 10 +- ptx/src/test/spirv_run/not.ll | 6 +- ptx/src/test/spirv_run/ntid.ll | 12 +- ptx/src/test/spirv_run/or.ll | 10 +- ptx/src/test/spirv_run/param_ptr.ll | 12 +- ptx/src/test/spirv_run/popc.ll | 6 +- ptx/src/test/spirv_run/pred_not.ll | 26 ++- ptx/src/test/spirv_run/prmt.ll | 42 ++-- ptx/src/test/spirv_run/prmt_non_immediate.ll | 24 +- ptx/src/test/spirv_run/rcp.ll | 6 +- ptx/src/test/spirv_run/reg_local.ll | 14 +- ptx/src/test/spirv_run/rem.ll | 10 +- ptx/src/test/spirv_run/rsqrt.ll | 10 +- ptx/src/test/spirv_run/s64_min.ll | 12 +- ptx/src/test/spirv_run/sad.ll | 38 ++-- ptx/src/test/spirv_run/selp.ll | 10 +- ptx/src/test/spirv_run/selp_true.ll | 10 +- ptx/src/test/spirv_run/set_f16x2.ll | 32 +-- ptx/src/test/spirv_run/setp.ll | 26 ++- ptx/src/test/spirv_run/setp_bool.ll | 44 ++-- ptx/src/test/spirv_run/setp_gt.ll | 26 ++- ptx/src/test/spirv_run/setp_leu.ll | 26 ++- ptx/src/test/spirv_run/setp_nan.ll | 98 ++++---- ptx/src/test/spirv_run/setp_num.ll | 98 ++++---- ptx/src/test/spirv_run/setp_pred2.ll | 26 ++- ptx/src/test/spirv_run/shared_ptr_32.ll | 16 +- .../test/spirv_run/shared_ptr_take_address.ll | 12 +- ptx/src/test/spirv_run/shared_unify_decl.ll | 34 +-- ptx/src/test/spirv_run/shared_unify_extern.ll | 34 +-- ptx/src/test/spirv_run/shared_unify_local.ll | 36 +-- ptx/src/test/spirv_run/shared_variable.ll | 6 +- ptx/src/test/spirv_run/shf.ll | 10 +- ptx/src/test/spirv_run/shl.ll | 10 +- ptx/src/test/spirv_run/shl_link_hack.ll | 10 +- ptx/src/test/spirv_run/shl_overflow.ll | 44 ++-- ptx/src/test/spirv_run/shr_s32.ll | 16 +- ptx/src/test/spirv_run/shr_u32.ll | 30 +-- ptx/src/test/spirv_run/sign_extend.ll | 6 +- ptx/src/test/spirv_run/sin.ll | 6 +- ptx/src/test/spirv_run/sqrt.ll | 6 +- ptx/src/test/spirv_run/sub.ll | 6 +- ptx/src/test/spirv_run/subc_cc.ll | 54 ++--- ptx/src/test/spirv_run/vector.ll | 74 +++--- ptx/src/test/spirv_run/vector4.ll | 12 +- ptx/src/test/spirv_run/vector_extract.ll | 48 ++-- ptx/src/test/spirv_run/vote_ballot.ll | 22 +- ptx/src/test/spirv_run/vshr.ll | 22 +- ptx/src/test/spirv_run/xor.ll | 10 +- ptx/src/translate.rs | 106 --------- 133 files changed, 1543 insertions(+), 1341 deletions(-) diff --git a/ptx/src/emit.rs b/ptx/src/emit.rs index 9e62d5b7..7388203b 100644 --- a/ptx/src/emit.rs +++ b/ptx/src/emit.rs @@ -7,7 +7,7 @@ use std::ffi::CStr; use std::fmt::Display; use std::io::Write; use std::ptr::null_mut; -use std::{convert, iter, mem, ptr}; +use std::{iter, mem, ptr}; use zluda_llvm::core::*; use zluda_llvm::prelude::*; use zluda_llvm::zluda::*; @@ -157,7 +157,7 @@ impl NamedIdGenerator { if let Some(id) = id { self.register_result(id, func) } else { - func(b"\0".as_ptr() as _) + func(LLVM_UNNAMED) } } @@ -505,10 +505,12 @@ fn emit_function_variable( ) -> Result<(), TranslateError> { let builder = ctx.builder.get(); let llvm_type = get_llvm_type(ctx, &variable.type_)?; - let addr_space = get_llvm_address_space(&ctx.constants, variable.state_space)?; - let value = ctx.names.register_result(variable.name, |name| unsafe { - LLVMZludaBuildAlloca(builder, llvm_type, addr_space, name) - }); + let value = emit_alloca( + ctx, + llvm_type, + get_llvm_address_space(&ctx.constants, variable.state_space)?, + Some(variable.name), + ); match variable.initializer { None => {} Some(init) => { @@ -531,12 +533,27 @@ fn emit_method<'a, 'input>( let llvm_method = emit_method_declaration(ctx, &method)?; emit_linkage_for_method(&method, is_kernel, llvm_method); emit_tuning(ctx, llvm_method, &method.tuning); - for statement in method.body.iter().flat_map(convert::identity) { + let statements = match method.body { + Some(statements) => statements, + None => return Ok(()), + }; + // Initial BB that holds all the variable declarations + let bb_with_variables = + unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) }; + // Rest of the code + let starting_bb = + unsafe { LLVMAppendBasicBlockInContext(ctx.context.get(), llvm_method, LLVM_UNNAMED) }; + unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), starting_bb) }; + for statement in statements.iter() { register_basic_blocks(ctx, llvm_method, statement); } - for statement in method.body.into_iter().flatten() { + for statement in statements.into_iter() { emit_statement(ctx, is_kernel, statement)?; } + // happens if there is a post-ret trailing label + terminate_current_block_if_needed(ctx, None); + unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), bb_with_variables) }; + unsafe { LLVMBuildBr(ctx.builder.get(), starting_bb) }; Ok(()) } @@ -604,7 +621,6 @@ fn emit_statement( is_kernel: bool, statement: crate::translate::ExpandedStatement, ) -> Result<(), TranslateError> { - start_synthetic_basic_block_if_needed(ctx, &statement); Ok(match statement { crate::translate::Statement::Label(label) => emit_label(ctx, label)?, crate::translate::Statement::Variable(var) => emit_function_variable(ctx, var)?, @@ -749,27 +765,6 @@ fn emit_ret_value( Ok(()) } -fn start_synthetic_basic_block_if_needed( - ctx: &mut EmitContext, - statement: &crate::translate::ExpandedStatement, -) { - let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) }; - if current_block == ptr::null_mut() { - return; - } - let terminator = unsafe { LLVMGetBasicBlockTerminator(current_block) }; - if terminator == ptr::null_mut() { - return; - } - if let crate::translate::Statement::Label(..) = statement { - return; - } - let new_block = - unsafe { LLVMCreateBasicBlockInContext(ctx.context.get(), b"\0".as_ptr() as _) }; - unsafe { LLVMInsertExistingBasicBlockAfterInsertBlock(ctx.builder.get(), new_block) }; - unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) }; -} - fn emit_ptr_access( ctx: &mut EmitContext, ptr_access: &crate::translate::PtrAccess, @@ -1073,7 +1068,7 @@ fn emit_value_copy( ) -> Result<(), TranslateError> { let builder = ctx.builder.get(); let type_ = get_llvm_type(ctx, type_)?; - let temp_value = unsafe { LLVMBuildAlloca(builder, type_, LLVM_UNNAMED) }; + let temp_value = emit_alloca(ctx, type_, ctx.constants.private_space, None); unsafe { LLVMBuildStore(builder, src, temp_value) }; ctx.names.register_result(dst, |dst| unsafe { LLVMBuildLoad2(builder, type_, temp_value, dst) @@ -1081,6 +1076,28 @@ fn emit_value_copy( Ok(()) } +// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html): +// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca +// instructions that are in the entry basic block. Given SSA is the canonical form expected by much +// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to +// be less effective than it could be." +fn emit_alloca( + ctx: &mut EmitContext, + type_: LLVMTypeRef, + addr_space: u32, + name: Option, +) -> LLVMValueRef { + let builder = ctx.builder.get(); + let current_bb = unsafe { LLVMGetInsertBlock(builder) }; + let variables_bb = unsafe { LLVMGetFirstBasicBlock(LLVMGetBasicBlockParent(current_bb)) }; + unsafe { LLVMPositionBuilderAtEnd(builder, variables_bb) }; + let result = ctx.names.register_result_option(name, |name| unsafe { + LLVMZludaBuildAlloca(builder, type_, addr_space, name) + }); + unsafe { LLVMPositionBuilderAtEnd(builder, current_bb) }; + result +} + fn emit_instruction( ctx: &mut EmitContext, is_kernel: bool, @@ -3494,12 +3511,12 @@ fn emit_store_var( fn emit_label(ctx: &mut EmitContext, label: Id) -> Result<(), TranslateError> { let new_block = unsafe { LLVMValueAsBasicBlock(ctx.names.value(label)?) }; - terminate_current_block_if_needed(ctx, new_block); + terminate_current_block_if_needed(ctx, Some(new_block)); unsafe { LLVMPositionBuilderAtEnd(ctx.builder.get(), new_block) }; Ok(()) } -fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasicBlockRef) { +fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: Option) { let current_block = unsafe { LLVMGetInsertBlock(ctx.builder.get()) }; if current_block == ptr::null_mut() { return; @@ -3508,7 +3525,10 @@ fn terminate_current_block_if_needed(ctx: &mut EmitContext, new_block: LLVMBasic if terminator != ptr::null_mut() { return; } - unsafe { LLVMBuildBr(ctx.builder.get(), new_block) }; + match new_block { + Some(new_block) => unsafe { LLVMBuildBr(ctx.builder.get(), new_block) }, + None => unsafe { LLVMBuildUnreachable(ctx.builder.get()) }, + }; } fn emit_method_declaration<'input>( diff --git a/ptx/src/test/spirv_run/abs.ll b/ptx/src/test/spirv_run/abs.ll index e086edac..43007903 100644 --- a/ptx/src/test/spirv_run/abs.ll +++ b/ptx/src/test/spirv_run/abs.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"37": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr store i32 %"29", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"14" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"32" = load i32, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load i32, ptr %"38", align 4 store i32 %"32", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"15" = call i32 @llvm.abs.i32(i32 %"16", i1 false) @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @abs(ptr addrspace(4) byref(i64) %"27", ptr %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"35" = inttoptr i64 %"21" to ptr - %"41" = getelementptr inbounds i8, ptr %"35", i64 4 - store i32 %"22", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"35", i64 4 + store i32 %"22", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/activemask.ll b/ptx/src/test/spirv_run/activemask.ll index 5ca886c8..684f89a2 100644 --- a/ptx/src/test/spirv_run/activemask.ll +++ b/ptx/src/test/spirv_run/activemask.ll @@ -4,11 +4,13 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__activemask() #0 define protected amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"11", ptr addrspace(4) byref(i64) %"12") #1 { -"15": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"7" = load i64, ptr addrspace(4) %"12", align 8 store i64 %"7", ptr addrspace(5) %"4", align 8 %"8" = call i32 @__zluda_ptx_impl__activemask() diff --git a/ptx/src/test/spirv_run/add.ll b/ptx/src/test/spirv_run/add.ll index 6a8ed120..babe5bb3 100644 --- a/ptx/src/test/spirv_run/add.ll +++ b/ptx/src/test/spirv_run/add.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/add_global.ll b/ptx/src/test/spirv_run/add_global.ll index 754623c0..70348574 100644 --- a/ptx/src/test/spirv_run/add_global.ll +++ b/ptx/src/test/spirv_run/add_global.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @PI = protected addrspace(1) externally_initialized global float 0x400921FB60000000, align 4 define protected amdgpu_kernel void @add_global(ptr addrspace(4) byref(i64) %"20", ptr addrspace(4) byref(i64) %"21") #0 { -"24": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"20", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"21", align 8 diff --git a/ptx/src/test/spirv_run/add_non_coherent.ll b/ptx/src/test/spirv_run/add_non_coherent.ll index ab8d0bcd..4d97dad1 100644 --- a/ptx/src/test/spirv_run/add_non_coherent.ll +++ b/ptx/src/test/spirv_run/add_non_coherent.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/add_param_ptr.ll b/ptx/src/test/spirv_run/add_param_ptr.ll index 810e9c89..9553fa5c 100644 --- a/ptx/src/test/spirv_run/add_param_ptr.ll +++ b/ptx/src/test/spirv_run/add_param_ptr.ll @@ -2,32 +2,34 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_param_ptr(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"38": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"31" = ptrtoint ptr addrspace(4) %"26" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"31", ptr addrspace(5) %0, align 8 - %"30" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"31", ptr addrspace(5) %1, align 8 + %"30" = load i64, ptr addrspace(5) %1, align 8 store i64 %"30", ptr addrspace(5) %"4", align 8 %"33" = ptrtoint ptr addrspace(4) %"27" to i64 - %1 = alloca i64, align 8, addrspace(5) - store i64 %"33", ptr addrspace(5) %1, align 8 - %"32" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"33", ptr addrspace(5) %2, align 8 + %"32" = load i64, ptr addrspace(5) %2, align 8 store i64 %"32", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"34" = inttoptr i64 %"12" to ptr addrspace(4) - %"40" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 - %"11" = load i64, ptr addrspace(4) %"40", align 8 + %"39" = getelementptr inbounds i8, ptr addrspace(4) %"34", i64 0 + %"11" = load i64, ptr addrspace(4) %"39", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"35" = inttoptr i64 %"14" to ptr addrspace(4) - %"42" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 - %"13" = load i64, ptr addrspace(4) %"42", align 8 + %"41" = getelementptr inbounds i8, ptr addrspace(4) %"35", i64 0 + %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"36" = inttoptr i64 %"16" to ptr diff --git a/ptx/src/test/spirv_run/add_tuning.ll b/ptx/src/test/spirv_run/add_tuning.ll index 9ec6795d..ac2972c5 100644 --- a/ptx/src/test/spirv_run/add_tuning.ll +++ b/ptx/src/test/spirv_run/add_tuning.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/addc_cc.ll b/ptx/src/test/spirv_run/addc_cc.ll index 32999828..d781744a 100644 --- a/ptx/src/test/spirv_run/addc_cc.ll +++ b/ptx/src/test/spirv_run/addc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", ptr addrspace(4) byref(i64) %"54") #0 { -"68": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"14" = load i64, ptr addrspace(4) %"53", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"54", align 8 @@ -24,45 +26,45 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", store i32 %"55", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"57" = inttoptr i64 %"19" to ptr - %"70" = getelementptr inbounds i8, ptr %"57", i64 4 - %"58" = load i32, ptr %"70", align 4 + %"69" = getelementptr inbounds i8, ptr %"57", i64 4 + %"58" = load i32, ptr %"69", align 4 store i32 %"58", ptr addrspace(5) %"10", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"59" = inttoptr i64 %"21" to ptr - %"72" = getelementptr inbounds i8, ptr %"59", i64 8 - %"20" = load i32, ptr %"72", align 4 + %"71" = getelementptr inbounds i8, ptr %"59", i64 8 + %"20" = load i32, ptr %"71", align 4 store i32 %"20", ptr addrspace(5) %"11", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"60" = inttoptr i64 %"23" to ptr - %"74" = getelementptr inbounds i8, ptr %"60", i64 12 - %"22" = load i32, ptr %"74", align 4 + %"73" = getelementptr inbounds i8, ptr %"60", i64 12 + %"22" = load i32, ptr %"73", align 4 store i32 %"22", ptr addrspace(5) %"12", align 4 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"27" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") - %"24" = extractvalue { i32, i1 } %0, 0 - %"25" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"26", i32 %"27") + %"24" = extractvalue { i32, i1 } %2, 0 + %"25" = extractvalue { i32, i1 } %2, 1 store i32 %"24", ptr addrspace(5) %"6", align 4 store i1 %"25", ptr addrspace(5) %"13", align 1 %"30" = load i1, ptr addrspace(5) %"13", align 1 %"31" = load i32, ptr addrspace(5) %"6", align 4 %"32" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"30" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"28" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"29" = xor i1 %4, %6 + %3 = zext i1 %"30" to i32 + %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %"31", i32 %"32") + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3) + %"28" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"29" = xor i1 %6, %8 store i32 %"28", ptr addrspace(5) %"7", align 4 store i1 %"29", ptr addrspace(5) %"13", align 1 %"34" = load i1, ptr addrspace(5) %"13", align 1 %"35" = load i32, ptr addrspace(5) %"7", align 4 %"36" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"34" to i32 - %8 = add i32 %"35", %"36" - %"33" = add i32 %8, %7 + %9 = zext i1 %"34" to i32 + %10 = add i32 %"35", %"36" + %"33" = add i32 %10, %9 store i32 %"33", ptr addrspace(5) %"8", align 4 %"37" = load i64, ptr addrspace(5) %"5", align 8 %"38" = load i32, ptr addrspace(5) %"6", align 4 @@ -71,13 +73,13 @@ define protected amdgpu_kernel void @addc_cc(ptr addrspace(4) byref(i64) %"53", %"39" = load i64, ptr addrspace(5) %"5", align 8 %"40" = load i32, ptr addrspace(5) %"7", align 4 %"66" = inttoptr i64 %"39" to ptr - %"76" = getelementptr inbounds i8, ptr %"66", i64 4 - store i32 %"40", ptr %"76", align 4 + %"75" = getelementptr inbounds i8, ptr %"66", i64 4 + store i32 %"40", ptr %"75", align 4 %"41" = load i64, ptr addrspace(5) %"5", align 8 %"42" = load i32, ptr addrspace(5) %"8", align 4 %"67" = inttoptr i64 %"41" to ptr - %"78" = getelementptr inbounds i8, ptr %"67", i64 8 - store i32 %"42", ptr %"78", align 4 + %"77" = getelementptr inbounds i8, ptr %"67", i64 8 + store i32 %"42", ptr %"77", align 4 ret void } diff --git a/ptx/src/test/spirv_run/addc_cc2.ll b/ptx/src/test/spirv_run/addc_cc2.ll index 836d8d5b..cd06ea27 100644 --- a/ptx/src/test/spirv_run/addc_cc2.ll +++ b/ptx/src/test/spirv_run/addc_cc2.ll @@ -2,50 +2,52 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { -"50": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"41" = extractvalue { i32, i1 } %0, 0 - %"12" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"41" = extractvalue { i32, i1 } %2, 0 + %"12" = extractvalue { i32, i1 } %2, 1 store i32 %"41", ptr addrspace(5) %"6", align 4 store i1 %"12", ptr addrspace(5) %"9", align 1 %"15" = load i1, ptr addrspace(5) %"9", align 1 - %1 = zext i1 %"15" to i32 - %2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %1) - %"42" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"14" = xor i1 %4, %6 + %3 = zext i1 %"15" to i32 + %4 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -4, i32 -4) + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %5, i32 %3) + %"42" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"14" = xor i1 %6, %8 store i32 %"42", ptr addrspace(5) %"6", align 4 store i1 %"14", ptr addrspace(5) %"9", align 1 %"17" = load i1, ptr addrspace(5) %"9", align 1 - %7 = zext i1 %"17" to i32 - %"43" = add i32 0, %7 + %9 = zext i1 %"17" to i32 + %"43" = add i32 0, %9 store i32 %"43", ptr addrspace(5) %"7", align 4 %"20" = load i1, ptr addrspace(5) %"9", align 1 - %8 = zext i1 %"20" to i32 - %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) - %10 = extractvalue { i32, i1 } %9, 0 - %11 = extractvalue { i32, i1 } %9, 1 - %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %10, i32 %8) - %"44" = extractvalue { i32, i1 } %12, 0 - %13 = extractvalue { i32, i1 } %12, 1 - %"19" = xor i1 %11, %13 + %10 = zext i1 %"20" to i32 + %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 -1) + %12 = extractvalue { i32, i1 } %11, 0 + %13 = extractvalue { i32, i1 } %11, 1 + %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %12, i32 %10) + %"44" = extractvalue { i32, i1 } %14, 0 + %15 = extractvalue { i32, i1 } %14, 1 + %"19" = xor i1 %13, %15 store i32 %"44", ptr addrspace(5) %"6", align 4 store i1 %"19", ptr addrspace(5) %"9", align 1 %"22" = load i1, ptr addrspace(5) %"9", align 1 - %14 = zext i1 %"22" to i32 - %"45" = add i32 0, %14 + %16 = zext i1 %"22" to i32 + %"45" = add i32 0, %16 store i32 %"45", ptr addrspace(5) %"8", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"7", align 4 @@ -54,8 +56,8 @@ define protected amdgpu_kernel void @addc_cc2(ptr addrspace(4) byref(i64) %"39", %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"8", align 4 %"48" = inttoptr i64 %"25" to ptr - %"52" = getelementptr inbounds i8, ptr %"48", i64 4 - store i32 %"26", ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"48", i64 4 + store i32 %"26", ptr %"51", align 4 ret void } diff --git a/ptx/src/test/spirv_run/alloca_call.ll b/ptx/src/test/spirv_run/alloca_call.ll index e6a9d6fa..aae7a916 100644 --- a/ptx/src/test/spirv_run/alloca_call.ll +++ b/ptx/src/test/spirv_run/alloca_call.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"58": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"7" = alloca i1, align 1, addrspace(5) %"8" = alloca double, align 8, addrspace(5) %"9" = alloca double, align 8, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"13" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5) %"48" = alloca [4 x i32], align 16, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"50" = load i64, ptr addrspace(4) %"42", align 8 store i64 %"50", ptr addrspace(5) %"10", align 8 %"51" = load i64, ptr addrspace(4) %"43", align 8 @@ -29,30 +31,30 @@ define protected amdgpu_kernel void @_Z13callback_onlyIdEvPvS0_10callback_tx(ptr %"30" = load i1, ptr addrspace(5) %"7", align 1 br i1 %"30", label %"6", label %"18" -"18": ; preds = %"58" +"18": ; preds = %1 %"31" = load i64, ptr addrspace(5) %"11", align 8 - %"60" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 - store i64 %"31", ptr addrspace(5) %"60", align 8 + %"59" = getelementptr inbounds i8, ptr addrspace(5) %"46", i64 0 + store i64 %"31", ptr addrspace(5) %"59", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"32" to ptr - %"21" = call [4 x i32] %0() + %2 = inttoptr i64 %"32" to ptr + %"21" = call [4 x i32] %2() store [4 x i32] %"21", ptr addrspace(5) %"48", align 4 - %"62" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 - %"19" = load <2 x double>, ptr addrspace(5) %"62", align 16 + %"61" = getelementptr inbounds i8, ptr addrspace(5) %"48", i64 0 + %"19" = load <2 x double>, ptr addrspace(5) %"61", align 16 %"33" = extractelement <2 x double> %"19", i32 0 %"34" = extractelement <2 x double> %"19", i32 1 store double %"33", ptr addrspace(5) %"8", align 8 store double %"34", ptr addrspace(5) %"9", align 8 %"35" = load double, ptr addrspace(5) %"8", align 8 %"36" = load double, ptr addrspace(5) %"9", align 8 - %1 = insertelement <2 x double> undef, double %"35", i32 0 - %"20" = insertelement <2 x double> %1, double %"36", i32 1 + %3 = insertelement <2 x double> undef, double %"35", i32 0 + %"20" = insertelement <2 x double> %3, double %"36", i32 1 %"37" = load i64, ptr addrspace(5) %"10", align 8 %"57" = inttoptr i64 %"37" to ptr addrspace(1) store <2 x double> %"20", ptr addrspace(1) %"57", align 16 br label %"6" -"6": ; preds = %"18", %"58" +"6": ; preds = %"18", %1 ret void } diff --git a/ptx/src/test/spirv_run/amdgpu_unnamed.ll b/ptx/src/test/spirv_run/amdgpu_unnamed.ll index 61e3de46..1a1ce58e 100644 --- a/ptx/src/test/spirv_run/amdgpu_unnamed.ll +++ b/ptx/src/test/spirv_run/amdgpu_unnamed.ll @@ -8,9 +8,7 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 { -"73": %"33" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"33", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) @@ -19,10 +17,17 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"19" = alloca i64, align 8, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) %"59" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) %"61" = alloca i32, align 4, addrspace(5) %"62" = alloca i64, align 8, addrspace(5) + %3 = alloca i64, align 8, addrspace(5) %"63" = alloca i64, align 8, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"33", align 1 %"34" = load i64, ptr addrspace(4) %"57", align 8 store i64 %"34", ptr addrspace(5) %"14", align 8 %"35" = load i64, ptr addrspace(4) %"58", align 8 @@ -37,28 +42,25 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) %"40" = load i1, ptr addrspace(5) %"18", align 1 br i1 %"40", label %"13", label %"27" -"27": ; preds = %"73" - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %0, align 8 - %"66" = load i64, ptr addrspace(5) %0, align 8 +"27": ; preds = %4 + store i64 ptrtoint (ptr addrspace(1) @0 to i64), ptr addrspace(5) %1, align 8 + %"66" = load i64, ptr addrspace(5) %1, align 8 store i64 %"66", ptr addrspace(5) %"19", align 8 %"42" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"42", ptr addrspace(5) %"59", align 8 - %1 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %1, align 8 - %"68" = load i64, ptr addrspace(5) %1, align 8 + store i64 ptrtoint (ptr addrspace(1) @1 to i64), ptr addrspace(5) %2, align 8 + %"68" = load i64, ptr addrspace(5) %2, align 8 store i64 %"68", ptr addrspace(5) %"19", align 8 %"44" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"44", ptr addrspace(5) %"60", align 8 store i32 1, ptr addrspace(5) %"61", align 4 - %2 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %2, align 8 - %"70" = load i64, ptr addrspace(5) %2, align 8 + store i64 ptrtoint (ptr addrspace(1) @2 to i64), ptr addrspace(5) %3, align 8 + %"70" = load i64, ptr addrspace(5) %3, align 8 store i64 %"70", ptr addrspace(5) %"19", align 8 %"46" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"46", ptr addrspace(5) %"62", align 8 - %"75" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 - store i64 1, ptr addrspace(5) %"75", align 8 + %"74" = getelementptr inbounds i8, ptr addrspace(5) %"63", i64 0 + store i64 1, ptr addrspace(5) %"74", align 8 %"28" = load i64, ptr addrspace(5) %"59", align 8 %"29" = load i64, ptr addrspace(5) %"60", align 8 %"30" = load i32, ptr addrspace(5) %"61", align 4 @@ -67,7 +69,7 @@ define protected amdgpu_kernel void @amdgpu_unnamed(ptr addrspace(4) byref(i64) call void @__zluda_ptx_impl____assertfail(i64 %"28", i64 %"29", i32 %"30", i64 %"31", i64 %"32") br label %"13" -"13": ; preds = %"27", %"73" +"13": ; preds = %"27", %4 %"48" = load i64, ptr addrspace(5) %"16", align 8 %"47" = add i64 %"48", 1 store i64 %"47", ptr addrspace(5) %"17", align 8 diff --git a/ptx/src/test/spirv_run/and.ll b/ptx/src/test/spirv_run/and.ll index c90f390d..7bb262de 100644 --- a/ptx/src/test/spirv_run/and.ll +++ b/ptx/src/test/spirv_run/and.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"32" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"32", align 4 + %"31" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"31", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/assertfail.ll b/ptx/src/test/spirv_run/assertfail.ll index 001dbfe6..93348590 100644 --- a/ptx/src/test/spirv_run/assertfail.ll +++ b/ptx/src/test/spirv_run/assertfail.ll @@ -4,42 +4,44 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl____assertfail(i64, i64, i32, i64, i64) #0 define protected amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 { -"81": %"35" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"35", align 1 %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) %"64" = alloca i64, align 8, addrspace(5) %"66" = alloca i64, align 8, addrspace(5) %"68" = alloca i32, align 4, addrspace(5) %"70" = alloca i64, align 8, addrspace(5) %"72" = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"35", align 1 %"36" = load i64, ptr addrspace(4) %"62", align 8 store i64 %"36", ptr addrspace(5) %"15", align 8 %"37" = load i64, ptr addrspace(4) %"63", align 8 store i64 %"37", ptr addrspace(5) %"16", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %0, align 4 - %"74" = load i32, ptr addrspace(5) %0, align 4 + store i32 0, ptr addrspace(5) %1, align 4 + %"74" = load i32, ptr addrspace(5) %1, align 4 store i32 %"74", ptr addrspace(5) %"19", align 4 %"39" = load i64, ptr addrspace(5) %"15", align 8 - %"83" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 - store i64 %"39", ptr addrspace(5) %"83", align 8 + %"82" = getelementptr inbounds i8, ptr addrspace(5) %"64", i64 0 + store i64 %"39", ptr addrspace(5) %"82", align 8 %"40" = load i64, ptr addrspace(5) %"15", align 8 - %"85" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 - store i64 %"40", ptr addrspace(5) %"85", align 8 + %"84" = getelementptr inbounds i8, ptr addrspace(5) %"66", i64 0 + store i64 %"40", ptr addrspace(5) %"84", align 8 %"41" = load i32, ptr addrspace(5) %"19", align 4 - %"87" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 - store i32 %"41", ptr addrspace(5) %"87", align 4 + %"86" = getelementptr inbounds i8, ptr addrspace(5) %"68", i64 0 + store i32 %"41", ptr addrspace(5) %"86", align 4 %"42" = load i64, ptr addrspace(5) %"15", align 8 - %"89" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 - store i64 %"42", ptr addrspace(5) %"89", align 8 + %"88" = getelementptr inbounds i8, ptr addrspace(5) %"70", i64 0 + store i64 %"42", ptr addrspace(5) %"88", align 8 %"43" = load i64, ptr addrspace(5) %"15", align 8 - %"91" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 - store i64 %"43", ptr addrspace(5) %"91", align 8 + %"90" = getelementptr inbounds i8, ptr addrspace(5) %"72", i64 0 + store i64 %"43", ptr addrspace(5) %"90", align 8 %"30" = load i64, ptr addrspace(5) %"64", align 8 %"31" = load i64, ptr addrspace(5) %"66", align 8 %"32" = load i32, ptr addrspace(5) %"68", align 4 diff --git a/ptx/src/test/spirv_run/atom_add.ll b/ptx/src/test/spirv_run/atom_add.ll index dff9e0ee..6dd159f3 100644 --- a/ptx/src/test/spirv_run/atom_add.ll +++ b/ptx/src/test/spirv_run/atom_add.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"37": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", store i32 %"12", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"31" = inttoptr i64 %"15" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"38", align 4 store i32 %"14", ptr addrspace(5) %"8", align 4 %"16" = load i32, ptr addrspace(5) %"7", align 4 store i32 %"16", ptr addrspace(3) @"4", align 4 @@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"28", %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"36" = inttoptr i64 %"22" to ptr - %"41" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"23", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"23", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_f16.ll b/ptx/src/test/spirv_run/atom_add_f16.ll index e63de903..a8fa430d 100644 --- a/ptx/src/test/spirv_run/atom_add_f16.ll +++ b/ptx/src/test/spirv_run/atom_add_f16.ll @@ -4,20 +4,22 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"37": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca half, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"26", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load i64, ptr addrspace(5) %"5", align 8 %"28" = inttoptr i64 %"12" to ptr - %"39" = getelementptr inbounds i8, ptr %"28", i64 2 - %"29" = load i16, ptr %"39", align 2 + %"38" = getelementptr inbounds i8, ptr %"28", i64 2 + %"29" = load i16, ptr %"38", align 2 %"11" = bitcast i16 %"29" to half store half %"11", ptr addrspace(5) %"7", align 2 %"14" = load i64, ptr addrspace(5) %"5", align 8 @@ -38,9 +40,9 @@ define protected amdgpu_kernel void @atom_add_f16(ptr addrspace(4) byref(i64) %" %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load half, ptr addrspace(5) %"7", align 2 %"35" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"35", i64 2 + %"40" = getelementptr inbounds i8, ptr %"35", i64 2 %"36" = bitcast half %"21" to i16 - store i16 %"36", ptr %"41", align 2 + store i16 %"36", ptr %"40", align 2 ret void } diff --git a/ptx/src/test/spirv_run/atom_add_float.ll b/ptx/src/test/spirv_run/atom_add_float.ll index 329d198f..d0e3c142 100644 --- a/ptx/src/test/spirv_run/atom_add_float.ll +++ b/ptx/src/test/spirv_run/atom_add_float.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [1024 x i8] undef, align 4 define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"37": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -21,8 +23,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) store float %"12", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"31" = inttoptr i64 %"15" to ptr - %"39" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"39", align 4 + %"38" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"38", align 4 store float %"14", ptr addrspace(5) %"8", align 4 %"16" = load float, ptr addrspace(5) %"7", align 4 store float %"16", ptr addrspace(3) @"4", align 4 @@ -38,8 +40,8 @@ define protected amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load float, ptr addrspace(5) %"8", align 4 %"36" = inttoptr i64 %"22" to ptr - %"41" = getelementptr inbounds i8, ptr %"36", i64 4 - store float %"23", ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"36", i64 4 + store float %"23", ptr %"40", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_cas.ll b/ptx/src/test/spirv_run/atom_cas.ll index 2e0475a9..a9af2c4e 100644 --- a/ptx/src/test/spirv_run/atom_cas.ll +++ b/ptx/src/test/spirv_run/atom_cas.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"29", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"30", align 8 @@ -20,14 +22,14 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", %"14" = load i64, ptr addrspace(5) %"4", align 8 %"15" = load i32, ptr addrspace(5) %"6", align 4 %"32" = inttoptr i64 %"14" to ptr - %"40" = getelementptr inbounds i8, ptr %"32", i64 4 - %0 = cmpxchg ptr %"40", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 - %"33" = extractvalue { i32, i1 } %0, 0 + %"39" = getelementptr inbounds i8, ptr %"32", i64 4 + %2 = cmpxchg ptr %"39", i32 %"15", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 + %"33" = extractvalue { i32, i1 } %2, 0 store i32 %"33", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"35" = inttoptr i64 %"17" to ptr - %"42" = getelementptr inbounds i8, ptr %"35", i64 4 - %"16" = load i32, ptr %"42", align 4 + %"41" = getelementptr inbounds i8, ptr %"35", i64 4 + %"16" = load i32, ptr %"41", align 4 store i32 %"16", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"6", align 4 @@ -36,8 +38,8 @@ define protected amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"29", %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 %"37" = inttoptr i64 %"20" to ptr - %"44" = getelementptr inbounds i8, ptr %"37", i64 4 - store i32 %"21", ptr %"44", align 4 + %"43" = getelementptr inbounds i8, ptr %"37", i64 4 + store i32 %"21", ptr %"43", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_inc.ll b/ptx/src/test/spirv_run/atom_inc.ll index 6fdc3c78..212c5927 100644 --- a/ptx/src/test/spirv_run/atom_inc.ll +++ b/ptx/src/test/spirv_run/atom_inc.ll @@ -6,14 +6,16 @@ declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_global_inc(ptr addrspace(1), i32) #0 define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #1 { -"38": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"30", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"31", align 8 @@ -37,13 +39,13 @@ define protected amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"30", %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 %"36" = inttoptr i64 %"20" to ptr - %"48" = getelementptr inbounds i8, ptr %"36", i64 4 - store i32 %"21", ptr %"48", align 4 + %"47" = getelementptr inbounds i8, ptr %"36", i64 4 + store i32 %"21", ptr %"47", align 4 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"37" = inttoptr i64 %"22" to ptr - %"50" = getelementptr inbounds i8, ptr %"37", i64 8 - store i32 %"23", ptr %"50", align 4 + %"49" = getelementptr inbounds i8, ptr %"37", i64 8 + store i32 %"23", ptr %"49", align 4 ret void } diff --git a/ptx/src/test/spirv_run/atom_ld_st.ll b/ptx/src/test/spirv_run/atom_ld_st.ll index 3b6488ca..eb59d314 100644 --- a/ptx/src/test/spirv_run/atom_ld_st.ll +++ b/ptx/src/test/spirv_run/atom_ld_st.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"18": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/atom_ld_st_vec.ll b/ptx/src/test/spirv_run/atom_ld_st_vec.ll index 7ea0fc50..5fa2409b 100644 --- a/ptx/src/test/spirv_run/atom_ld_st_vec.ll +++ b/ptx/src/test/spirv_run/atom_ld_st_vec.ll @@ -2,33 +2,35 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_ld_st_vec(ptr addrspace(4) byref(i64) %"19", ptr addrspace(4) byref(i64) %"20") #0 { -"23": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"19", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"20", align 8 store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(5) %"4", align 8 %"21" = inttoptr i64 %"13" to ptr - %0 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 - %"8" = bitcast i128 %0 to <2 x i64> + %2 = load atomic i128, ptr %"21" syncscope("agent-one-as") acquire, align 16 + %"8" = bitcast i128 %2 to <2 x i64> %"14" = extractelement <2 x i64> %"8", i32 0 %"15" = extractelement <2 x i64> %"8", i32 1 store i64 %"14", ptr addrspace(5) %"6", align 8 store i64 %"15", ptr addrspace(5) %"7", align 8 %"16" = load i64, ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"7", align 8 - %1 = insertelement <2 x i64> undef, i64 %"16", i32 0 - %"9" = insertelement <2 x i64> %1, i64 %"17", i32 1 + %3 = insertelement <2 x i64> undef, i64 %"16", i32 0 + %"9" = insertelement <2 x i64> %3, i64 %"17", i32 1 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"22" = inttoptr i64 %"18" to ptr - %2 = bitcast <2 x i64> %"9" to i128 - store atomic i128 %2, ptr %"22" syncscope("agent-one-as") release, align 16 + %4 = bitcast <2 x i64> %"9" to i128 + store atomic i128 %4, ptr %"22" syncscope("agent-one-as") release, align 16 ret void } diff --git a/ptx/src/test/spirv_run/atom_max_u32.ll b/ptx/src/test/spirv_run/atom_max_u32.ll index 64cb4304..8135e3df 100644 --- a/ptx/src/test/spirv_run/atom_max_u32.ll +++ b/ptx/src/test/spirv_run/atom_max_u32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -23,8 +25,8 @@ define protected amdgpu_kernel void @atom_max_u32(ptr addrspace(4) byref(i64) %" store i32 %"14", ptr %"25", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"16" to ptr - %"32" = getelementptr inbounds i8, ptr %"26", i64 4 - %"15" = load i32, ptr %"32", align 4 + %"31" = getelementptr inbounds i8, ptr %"26", i64 4 + %"15" = load i32, ptr %"31", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/b64tof64.ll b/ptx/src/test/spirv_run/b64tof64.ll index 5cd7a2c7..4a8d9b35 100644 --- a/ptx/src/test/spirv_run/b64tof64.ll +++ b/ptx/src/test/spirv_run/b64tof64.ll @@ -2,22 +2,24 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca double, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load double, ptr addrspace(4) %"17", align 8 store double %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load double, ptr addrspace(5) %"4", align 8 %"20" = bitcast double %"12" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"20", ptr addrspace(5) %0, align 8 - %"11" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %1, align 8 + %"11" = load i64, ptr addrspace(5) %1, align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"21" = inttoptr i64 %"14" to ptr diff --git a/ptx/src/test/spirv_run/barrier.ll b/ptx/src/test/spirv_run/barrier.ll index e2e65f2c..55d0c93e 100644 --- a/ptx/src/test/spirv_run/barrier.ll +++ b/ptx/src/test/spirv_run/barrier.ll @@ -4,8 +4,10 @@ target triple = "amdgcn-amd-amdhsa" declare void @__zluda_ptx_impl__barrier_sync(i32) #0 define protected amdgpu_kernel void @barrier() #1 { -"4": %"2" = alloca i1, align 1, addrspace(5) + br label %1 + +1: ; preds = %0 store i1 false, ptr addrspace(5) %"2", align 1 call void @__zluda_ptx_impl__barrier_sync(i32 0) ret void diff --git a/ptx/src/test/spirv_run/bfe.ll b/ptx/src/test/spirv_run/bfe.ll index 99fd766f..6644c204 100644 --- a/ptx/src/test/spirv_run/bfe.ll +++ b/ptx/src/test/spirv_run/bfe.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfe_u32(i32, i32, i32) #0 define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -22,13 +24,13 @@ define protected amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"28", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"41" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"40", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"43" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load i32, ptr %"43", align 4 + %"42" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"42", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/bfi.ll b/ptx/src/test/spirv_run/bfi.ll index bea4ac5f..3c6a3777 100644 --- a/ptx/src/test/spirv_run/bfi.ll +++ b/ptx/src/test/spirv_run/bfi.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__bfi_b32(i32, i32, i32, i32) #0 define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 { -"44": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"35", align 8 @@ -23,18 +25,18 @@ define protected amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"34", ptr store i32 %"13", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"16" to ptr - %"52" = getelementptr inbounds i8, ptr %"37", i64 4 - %"15" = load i32, ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"37", i64 4 + %"15" = load i32, ptr %"51", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"38" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"38", i64 8 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"38", i64 8 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"39", i64 12 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"39", i64 12 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"9", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 %"23" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/bfind.ll b/ptx/src/test/spirv_run/bfind.ll index ebd9fea0..a427332d 100644 --- a/ptx/src/test/spirv_run/bfind.ll +++ b/ptx/src/test/spirv_run/bfind.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"52": %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"12", align 1 %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 @@ -23,31 +25,31 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"44", i64 4 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"45" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"45", i64 8 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"22", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) - %2 = sub i32 31, %1 - %"46" = select i1 %0, i32 -1, i32 %2 + %2 = icmp eq i32 %"22", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %4 = sub i32 31, %3 + %"46" = select i1 %2, i32 -1, i32 %4 store i32 %"46", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 - %3 = icmp eq i32 %"24", 0 - %4 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) - %5 = sub i32 31, %4 - %"47" = select i1 %3, i32 -1, i32 %5 + %5 = icmp eq i32 %"24", 0 + %6 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %7 = sub i32 31, %6 + %"47" = select i1 %5, i32 -1, i32 %7 store i32 %"47", ptr addrspace(5) %"10", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 - %6 = icmp eq i32 %"26", 0 - %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) - %8 = sub i32 31, %7 - %"48" = select i1 %6, i32 -1, i32 %8 + %8 = icmp eq i32 %"26", 0 + %9 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %10 = sub i32 31, %9 + %"48" = select i1 %8, i32 -1, i32 %10 store i32 %"48", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"5", align 8 %"28" = load i32, ptr addrspace(5) %"9", align 4 @@ -56,13 +58,13 @@ define protected amdgpu_kernel void @bfind(ptr addrspace(4) byref(i64) %"41", pt %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"50" = inttoptr i64 %"29" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - store i32 %"30", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"57", align 4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"11", align 4 %"51" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 8 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/bfind_shiftamt.ll b/ptx/src/test/spirv_run/bfind_shiftamt.ll index fd215142..9968d85d 100644 --- a/ptx/src/test/spirv_run/bfind_shiftamt.ll +++ b/ptx/src/test/spirv_run/bfind_shiftamt.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { -"52": %"12" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"12", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -13,6 +11,10 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"12", align 1 %"13" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"13", ptr addrspace(5) %"4", align 8 %"14" = load i64, ptr addrspace(4) %"42", align 8 @@ -23,28 +25,28 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"18" to ptr - %"54" = getelementptr inbounds i8, ptr %"44", i64 4 - %"17" = load i32, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"44", i64 4 + %"17" = load i32, ptr %"53", align 4 store i32 %"17", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"45" = inttoptr i64 %"20" to ptr - %"56" = getelementptr inbounds i8, ptr %"45", i64 8 - %"19" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"45", i64 8 + %"19" = load i32, ptr %"55", align 4 store i32 %"19", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 - %0 = icmp eq i32 %"22", 0 - %1 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) - %"46" = select i1 %0, i32 -1, i32 %1 + %2 = icmp eq i32 %"22", 0 + %3 = call i32 @llvm.ctlz.i32(i32 %"22", i1 true) + %"46" = select i1 %2, i32 -1, i32 %3 store i32 %"46", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 - %2 = icmp eq i32 %"24", 0 - %3 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) - %"47" = select i1 %2, i32 -1, i32 %3 + %4 = icmp eq i32 %"24", 0 + %5 = call i32 @llvm.ctlz.i32(i32 %"24", i1 true) + %"47" = select i1 %4, i32 -1, i32 %5 store i32 %"47", ptr addrspace(5) %"10", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp eq i32 %"26", 0 - %5 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) - %"48" = select i1 %4, i32 -1, i32 %5 + %6 = icmp eq i32 %"26", 0 + %7 = call i32 @llvm.ctlz.i32(i32 %"26", i1 true) + %"48" = select i1 %6, i32 -1, i32 %7 store i32 %"48", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"5", align 8 %"28" = load i32, ptr addrspace(5) %"9", align 4 @@ -53,13 +55,13 @@ define protected amdgpu_kernel void @bfind_shiftamt(ptr addrspace(4) byref(i64) %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"50" = inttoptr i64 %"29" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - store i32 %"30", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + store i32 %"30", ptr %"57", align 4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"11", align 4 %"51" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 8 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 8 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/block.ll b/ptx/src/test/spirv_run/block.ll index 87dd2276..b482fe28 100644 --- a/ptx/src/test/spirv_run/block.ll +++ b/ptx/src/test/spirv_run/block.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"26": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 diff --git a/ptx/src/test/spirv_run/bra.ll b/ptx/src/test/spirv_run/bra.ll index 6d62ccaf..41733929 100644 --- a/ptx/src/test/spirv_run/bra.ll +++ b/ptx/src/test/spirv_run/bra.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"28": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"12", ptr addrspace(5) %"7", align 8 %"13" = load i64, ptr addrspace(4) %"25", align 8 @@ -19,19 +21,19 @@ define protected amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"24", ptr store i64 %"14", ptr addrspace(5) %"9", align 8 br label %"4" -"4": ; preds = %"28" +"4": ; preds = %1 %"17" = load i64, ptr addrspace(5) %"9", align 8 %"16" = add i64 %"17", 1 store i64 %"16", ptr addrspace(5) %"10", align 8 br label %"6" -0: ; No predecessors! +"5": ; No predecessors! %"19" = load i64, ptr addrspace(5) %"9", align 8 %"18" = add i64 %"19", 2 store i64 %"18", ptr addrspace(5) %"10", align 8 br label %"6" -"6": ; preds = %0, %"4" +"6": ; preds = %"5", %"4" %"20" = load i64, ptr addrspace(5) %"8", align 8 %"21" = load i64, ptr addrspace(5) %"10", align 8 %"27" = inttoptr i64 %"20" to ptr diff --git a/ptx/src/test/spirv_run/brev.ll b/ptx/src/test/spirv_run/brev.ll index a519c2bb..d838750b 100644 --- a/ptx/src/test/spirv_run/brev.ll +++ b/ptx/src/test/spirv_run/brev.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/call.ll b/ptx/src/test/spirv_run/call.ll index d89322ef..684bb0c4 100644 --- a/ptx/src/test/spirv_run/call.ll +++ b/ptx/src/test/spirv_run/call.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private i64 @incr(i64 %"29") #0 { -"49": %"18" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"42" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"18", align 8 + store i1 false, ptr addrspace(5) %"20", align 1 %"30" = load i64, ptr addrspace(5) %"18", align 8 store i64 %"30", ptr addrspace(5) %"43", align 8 %"31" = load i64, ptr addrspace(5) %"43", align 8 @@ -27,14 +29,16 @@ define private i64 @incr(i64 %"29") #0 { } define protected amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"48": %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"19", align 1 %"21" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"21", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(4) %"39", align 8 diff --git a/ptx/src/test/spirv_run/call_bug.ll b/ptx/src/test/spirv_run/call_bug.ll index 3ad91461..12c8e2cd 100644 --- a/ptx/src/test/spirv_run/call_bug.ll +++ b/ptx/src/test/spirv_run/call_bug.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private [2 x i32] @incr(i64 %"21") #0 { -"56": %"16" = alloca i64, align 8, addrspace(5) %"15" = alloca [2 x i32], align 4, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"42" = alloca [2 x i32], align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5) %"4" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"21", ptr addrspace(5) %"16", align 8 + store i1 false, ptr addrspace(5) %"19", align 1 %"22" = load i64, ptr addrspace(5) %"16", align 8 store i64 %"22", ptr addrspace(5) %"43", align 8 %"23" = load i64, ptr addrspace(5) %"43", align 8 @@ -27,15 +29,17 @@ define private [2 x i32] @incr(i64 %"21") #0 { } define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"57": %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5) %"47" = alloca [2 x i32], align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 %"29" = load i64, ptr addrspace(4) %"44", align 8 store i64 %"29", ptr addrspace(5) %"8", align 8 %"30" = load i64, ptr addrspace(4) %"45", align 8 @@ -49,11 +53,11 @@ define protected amdgpu_kernel void @call_bug(ptr addrspace(4) byref(i64) %"44", store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"11", align 8 %"17" = load i64, ptr addrspace(5) %"46", align 8 %"35" = load i64, ptr addrspace(5) %"11", align 8 - %0 = inttoptr i64 %"35" to ptr - %"18" = call [2 x i32] %0(i64 %"17") + %2 = inttoptr i64 %"35" to ptr + %"18" = call [2 x i32] %2(i64 %"17") store [2 x i32] %"18", ptr addrspace(5) %"47", align 4 - %"59" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 - %"36" = load i64, ptr addrspace(5) %"59", align 8 + %"57" = getelementptr inbounds i8, ptr addrspace(5) %"47", i64 0 + %"36" = load i64, ptr addrspace(5) %"57", align 8 store i64 %"36", ptr addrspace(5) %"10", align 8 %"37" = load i64, ptr addrspace(5) %"9", align 8 %"38" = load i64, ptr addrspace(5) %"10", align 8 diff --git a/ptx/src/test/spirv_run/call_multi_return.ll b/ptx/src/test/spirv_run/call_multi_return.ll index 35cc5e08..5cf701b9 100644 --- a/ptx/src/test/spirv_run/call_multi_return.ll +++ b/ptx/src/test/spirv_run/call_multi_return.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" %struct.i64i32 = type { i64, i32 } define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { -"62": %"18" = alloca i32, align 4, addrspace(5) %"19" = alloca i32, align 4, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i32, align 4, addrspace(5) %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"20" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 store i32 %"39", ptr addrspace(5) %"18", align 4 store i32 %"40", ptr addrspace(5) %"19", align 4 + store i1 false, ptr addrspace(5) %"22", align 1 %"42" = load i32, ptr addrspace(5) %"18", align 4 %"43" = load i32, ptr addrspace(5) %"19", align 4 %"41" = add i32 %"42", %"43" @@ -27,15 +29,13 @@ define private %struct.i64i32 @"1"(i32 %"39", i32 %"40") #0 { store i32 %"46", ptr addrspace(5) %"17", align 4 %"49" = load i64, ptr addrspace(5) %"16", align 8 %"50" = load i32, ptr addrspace(5) %"17", align 4 - %0 = insertvalue %struct.i64i32 undef, i64 %"49", 0 - %1 = insertvalue %struct.i64i32 %0, i32 %"50", 1 - ret %struct.i64i32 %1 + %2 = insertvalue %struct.i64i32 undef, i64 %"49", 0 + %3 = insertvalue %struct.i64i32 %2, i32 %"50", 1 + ret %struct.i64i32 %3 } define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #0 { -"61": %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) @@ -43,6 +43,10 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"21", align 1 %"23" = load i64, ptr addrspace(4) %"55", align 8 store i64 %"23", ptr addrspace(5) %"9", align 8 %"24" = load i64, ptr addrspace(4) %"56", align 8 @@ -53,14 +57,14 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 store i32 %"25", ptr addrspace(5) %"11", align 4 %"28" = load i64, ptr addrspace(5) %"9", align 8 %"58" = inttoptr i64 %"28" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 - %"27" = load i32, ptr addrspace(1) %"64", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4 + %"27" = load i32, ptr addrspace(1) %"62", align 4 store i32 %"27", ptr addrspace(5) %"12", align 4 %"31" = load i32, ptr addrspace(5) %"11", align 4 %"32" = load i32, ptr addrspace(5) %"12", align 4 - %0 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") - %"29" = extractvalue %struct.i64i32 %0, 0 - %"30" = extractvalue %struct.i64i32 %0, 1 + %2 = call %struct.i64i32 @"1"(i32 %"31", i32 %"32") + %"29" = extractvalue %struct.i64i32 %2, 0 + %"30" = extractvalue %struct.i64i32 %2, 1 store i64 %"29", ptr addrspace(5) %"13", align 8 store i32 %"30", ptr addrspace(5) %"15", align 4 %"34" = load i32, ptr addrspace(5) %"15", align 4 @@ -73,8 +77,8 @@ define protected amdgpu_kernel void @call_multi_return(ptr addrspace(4) byref(i6 %"37" = load i64, ptr addrspace(5) %"10", align 8 %"38" = load i64, ptr addrspace(5) %"14", align 8 %"60" = inttoptr i64 %"37" to ptr addrspace(1) - %"66" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 - store i64 %"38", ptr addrspace(1) %"66", align 8 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 8 + store i64 %"38", ptr addrspace(1) %"64", align 8 ret void } diff --git a/ptx/src/test/spirv_run/callprototype.ll b/ptx/src/test/spirv_run/callprototype.ll index be431eac..9cba37cb 100644 --- a/ptx/src/test/spirv_run/callprototype.ll +++ b/ptx/src/test/spirv_run/callprototype.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private i64 @incr(i64 %"33") #0 { -"54": %"20" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"46" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"33", ptr addrspace(5) %"20", align 8 + store i1 false, ptr addrspace(5) %"22", align 1 %"34" = load i64, ptr addrspace(5) %"20", align 8 store i64 %"34", ptr addrspace(5) %"47", align 8 %"35" = load i64, ptr addrspace(5) %"47", align 8 @@ -27,15 +29,17 @@ define private i64 @incr(i64 %"33") #0 { } define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { -"53": %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"21", align 1 %"23" = load i64, ptr addrspace(4) %"42", align 8 store i64 %"23", ptr addrspace(5) %"7", align 8 %"24" = load i64, ptr addrspace(4) %"43", align 8 @@ -49,8 +53,8 @@ define protected amdgpu_kernel void @callprototype(ptr addrspace(4) byref(i64) % store i64 ptrtoint (ptr @incr to i64), ptr addrspace(5) %"10", align 8 %"17" = load i64, ptr addrspace(5) %"44", align 8 %"29" = load i64, ptr addrspace(5) %"10", align 8 - %0 = inttoptr i64 %"29" to ptr - %"18" = call i64 %0(i64 %"17") + %2 = inttoptr i64 %"29" to ptr + %"18" = call i64 %2(i64 %"17") store i64 %"18", ptr addrspace(5) %"45", align 8 %"30" = load i64, ptr addrspace(5) %"45", align 8 store i64 %"30", ptr addrspace(5) %"9", align 8 diff --git a/ptx/src/test/spirv_run/carry_set_all.ll b/ptx/src/test/spirv_run/carry_set_all.ll index 8b412c10..8983b702 100644 --- a/ptx/src/test/spirv_run/carry_set_all.ll +++ b/ptx/src/test/spirv_run/carry_set_all.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) %"208", ptr addrspace(4) byref(i64) %"209") #0 { -"268": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -23,147 +21,151 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) % %"19" = alloca i32, align 4, addrspace(5) %"20" = alloca i32, align 4, addrspace(5) %"21" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"37" = load i64, ptr addrspace(4) %"209", align 8 store i64 %"37", ptr addrspace(5) %"5", align 8 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %"210" = extractvalue { i32, i1 } %0, 0 - %"23" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %"210" = extractvalue { i32, i1 } %2, 0 + %"23" = extractvalue { i32, i1 } %2, 1 store i32 %"210", ptr addrspace(5) %"6", align 4 %"39" = xor i1 %"23", true store i1 %"39", ptr addrspace(5) %"22", align 1 %"41" = load i1, ptr addrspace(5) %"22", align 1 - %1 = zext i1 %"41" to i32 - %"211" = add i32 0, %1 + %3 = zext i1 %"41" to i32 + %"211" = add i32 0, %3 store i32 %"211", ptr addrspace(5) %"6", align 4 %"42" = load i1, ptr addrspace(5) %"22", align 1 %"24" = xor i1 %"42", true - %2 = zext i1 %"24" to i32 - %"212" = sub i32 0, %2 + %4 = zext i1 %"24" to i32 + %"212" = sub i32 0, %4 store i32 %"212", ptr addrspace(5) %"7", align 4 - %3 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %"213" = extractvalue { i32, i1 } %3, 0 - %"25" = extractvalue { i32, i1 } %3, 1 + %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %"213" = extractvalue { i32, i1 } %5, 0 + %"25" = extractvalue { i32, i1 } %5, 1 store i32 %"213", ptr addrspace(5) %"8", align 4 %"45" = xor i1 %"25", true store i1 %"45", ptr addrspace(5) %"22", align 1 %"47" = load i1, ptr addrspace(5) %"22", align 1 - %4 = zext i1 %"47" to i32 - %"214" = add i32 0, %4 + %6 = zext i1 %"47" to i32 + %"214" = add i32 0, %6 store i32 %"214", ptr addrspace(5) %"8", align 4 %"48" = load i1, ptr addrspace(5) %"22", align 1 %"26" = xor i1 %"48", true - %5 = zext i1 %"26" to i32 - %"215" = sub i32 0, %5 + %7 = zext i1 %"26" to i32 + %"215" = sub i32 0, %7 store i32 %"215", ptr addrspace(5) %"9", align 4 - %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"216" = extractvalue { i32, i1 } %6, 0 - %"51" = extractvalue { i32, i1 } %6, 1 + %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"216" = extractvalue { i32, i1 } %8, 0 + %"51" = extractvalue { i32, i1 } %8, 1 store i32 %"216", ptr addrspace(5) %"10", align 4 store i1 %"51", ptr addrspace(5) %"22", align 1 %"53" = load i1, ptr addrspace(5) %"22", align 1 - %7 = zext i1 %"53" to i32 - %"217" = add i32 0, %7 + %9 = zext i1 %"53" to i32 + %"217" = add i32 0, %9 store i32 %"217", ptr addrspace(5) %"10", align 4 %"54" = load i1, ptr addrspace(5) %"22", align 1 %"27" = xor i1 %"54", true - %8 = zext i1 %"27" to i32 - %"218" = sub i32 0, %8 + %10 = zext i1 %"27" to i32 + %"218" = sub i32 0, %10 store i32 %"218", ptr addrspace(5) %"11", align 4 - %9 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"219" = extractvalue { i32, i1 } %9, 0 - %"57" = extractvalue { i32, i1 } %9, 1 + %11 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"219" = extractvalue { i32, i1 } %11, 0 + %"57" = extractvalue { i32, i1 } %11, 1 store i32 %"219", ptr addrspace(5) %"12", align 4 store i1 %"57", ptr addrspace(5) %"22", align 1 %"59" = load i1, ptr addrspace(5) %"22", align 1 - %10 = zext i1 %"59" to i32 - %"220" = add i32 0, %10 + %12 = zext i1 %"59" to i32 + %"220" = add i32 0, %12 store i32 %"220", ptr addrspace(5) %"12", align 4 %"60" = load i1, ptr addrspace(5) %"22", align 1 %"28" = xor i1 %"60", true - %11 = zext i1 %"28" to i32 - %"221" = sub i32 0, %11 + %13 = zext i1 %"28" to i32 + %"221" = sub i32 0, %13 store i32 %"221", ptr addrspace(5) %"13", align 4 - %12 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"222" = extractvalue { i32, i1 } %12, 0 - %"63" = extractvalue { i32, i1 } %12, 1 + %14 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"222" = extractvalue { i32, i1 } %14, 0 + %"63" = extractvalue { i32, i1 } %14, 1 store i32 %"222", ptr addrspace(5) %"14", align 4 store i1 %"63", ptr addrspace(5) %"22", align 1 %"65" = load i1, ptr addrspace(5) %"22", align 1 - %13 = zext i1 %"65" to i32 - %"223" = add i32 0, %13 + %15 = zext i1 %"65" to i32 + %"223" = add i32 0, %15 store i32 %"223", ptr addrspace(5) %"14", align 4 %"66" = load i1, ptr addrspace(5) %"22", align 1 %"29" = xor i1 %"66", true - %14 = zext i1 %"29" to i32 - %"224" = sub i32 0, %14 + %16 = zext i1 %"29" to i32 + %"224" = sub i32 0, %16 store i32 %"224", ptr addrspace(5) %"15", align 4 - %15 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) - %"225" = extractvalue { i32, i1 } %15, 0 - %"69" = extractvalue { i32, i1 } %15, 1 + %17 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 -1, i32 -1) + %"225" = extractvalue { i32, i1 } %17, 0 + %"69" = extractvalue { i32, i1 } %17, 1 store i32 %"225", ptr addrspace(5) %"16", align 4 store i1 %"69", ptr addrspace(5) %"22", align 1 %"71" = load i1, ptr addrspace(5) %"22", align 1 - %16 = zext i1 %"71" to i32 - %"226" = add i32 0, %16 + %18 = zext i1 %"71" to i32 + %"226" = add i32 0, %18 store i32 %"226", ptr addrspace(5) %"16", align 4 %"72" = load i1, ptr addrspace(5) %"22", align 1 %"30" = xor i1 %"72", true - %17 = zext i1 %"30" to i32 - %"227" = sub i32 0, %17 + %19 = zext i1 %"30" to i32 + %"227" = sub i32 0, %19 store i32 %"227", ptr addrspace(5) %"17", align 4 - %18 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"228" = extractvalue { i32, i1 } %18, 0 - %"75" = extractvalue { i32, i1 } %18, 1 + %20 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"228" = extractvalue { i32, i1 } %20, 0 + %"75" = extractvalue { i32, i1 } %20, 1 store i32 %"228", ptr addrspace(5) %"18", align 4 store i1 %"75", ptr addrspace(5) %"22", align 1 %"76" = load i1, ptr addrspace(5) %"22", align 1 %"31" = xor i1 %"76", true - %19 = zext i1 %"31" to i32 - %20 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) - %21 = extractvalue { i32, i1 } %20, 0 - %22 = extractvalue { i32, i1 } %20, 1 - %23 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %21, i32 %19) - %"229" = extractvalue { i32, i1 } %23, 0 - %24 = extractvalue { i32, i1 } %23, 1 - %"32" = xor i1 %22, %24 + %21 = zext i1 %"31" to i32 + %22 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 0) + %23 = extractvalue { i32, i1 } %22, 0 + %24 = extractvalue { i32, i1 } %22, 1 + %25 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %23, i32 %21) + %"229" = extractvalue { i32, i1 } %25, 0 + %26 = extractvalue { i32, i1 } %25, 1 + %"32" = xor i1 %24, %26 store i32 %"229", ptr addrspace(5) %"18", align 4 %"78" = xor i1 %"32", true store i1 %"78", ptr addrspace(5) %"22", align 1 %"80" = load i1, ptr addrspace(5) %"22", align 1 - %25 = zext i1 %"80" to i32 - %"230" = add i32 0, %25 + %27 = zext i1 %"80" to i32 + %"230" = add i32 0, %27 store i32 %"230", ptr addrspace(5) %"18", align 4 %"81" = load i1, ptr addrspace(5) %"22", align 1 %"33" = xor i1 %"81", true - %26 = zext i1 %"33" to i32 - %"231" = sub i32 0, %26 + %28 = zext i1 %"33" to i32 + %"231" = sub i32 0, %28 store i32 %"231", ptr addrspace(5) %"19", align 4 - %27 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) - %"232" = extractvalue { i32, i1 } %27, 0 - %"84" = extractvalue { i32, i1 } %27, 1 + %29 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0) + %"232" = extractvalue { i32, i1 } %29, 0 + %"84" = extractvalue { i32, i1 } %29, 1 store i32 %"232", ptr addrspace(5) %"20", align 4 store i1 %"84", ptr addrspace(5) %"22", align 1 %"85" = load i1, ptr addrspace(5) %"22", align 1 %"34" = xor i1 %"85", true - %28 = zext i1 %"34" to i32 - %29 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) - %30 = extractvalue { i32, i1 } %29, 0 - %31 = extractvalue { i32, i1 } %29, 1 - %32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %30, i32 %28) - %"233" = extractvalue { i32, i1 } %32, 0 - %33 = extractvalue { i32, i1 } %32, 1 - %"35" = xor i1 %31, %33 + %30 = zext i1 %"34" to i32 + %31 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 1) + %32 = extractvalue { i32, i1 } %31, 0 + %33 = extractvalue { i32, i1 } %31, 1 + %34 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %32, i32 %30) + %"233" = extractvalue { i32, i1 } %34, 0 + %35 = extractvalue { i32, i1 } %34, 1 + %"35" = xor i1 %33, %35 store i32 %"233", ptr addrspace(5) %"20", align 4 %"87" = xor i1 %"35", true store i1 %"87", ptr addrspace(5) %"22", align 1 %"89" = load i1, ptr addrspace(5) %"22", align 1 - %34 = zext i1 %"89" to i32 - %"234" = add i32 0, %34 + %36 = zext i1 %"89" to i32 + %"234" = add i32 0, %36 store i32 %"234", ptr addrspace(5) %"20", align 4 %"90" = load i1, ptr addrspace(5) %"22", align 1 %"36" = xor i1 %"90", true - %35 = zext i1 %"36" to i32 - %"235" = sub i32 0, %35 + %37 = zext i1 %"36" to i32 + %"235" = sub i32 0, %37 store i32 %"235", ptr addrspace(5) %"21", align 4 %"92" = load i64, ptr addrspace(5) %"5", align 8 %"93" = load i32, ptr addrspace(5) %"6", align 4 @@ -172,78 +174,78 @@ define protected amdgpu_kernel void @carry_set_all(ptr addrspace(4) byref(i64) % %"94" = load i64, ptr addrspace(5) %"5", align 8 %"95" = load i32, ptr addrspace(5) %"8", align 4 %"238" = inttoptr i64 %"94" to ptr - %"270" = getelementptr inbounds i8, ptr %"238", i64 4 - store i32 %"95", ptr %"270", align 4 + %"269" = getelementptr inbounds i8, ptr %"238", i64 4 + store i32 %"95", ptr %"269", align 4 %"96" = load i64, ptr addrspace(5) %"5", align 8 %"97" = load i32, ptr addrspace(5) %"10", align 4 %"240" = inttoptr i64 %"96" to ptr - %"272" = getelementptr inbounds i8, ptr %"240", i64 8 - store i32 %"97", ptr %"272", align 4 + %"271" = getelementptr inbounds i8, ptr %"240", i64 8 + store i32 %"97", ptr %"271", align 4 %"98" = load i64, ptr addrspace(5) %"5", align 8 %"99" = load i32, ptr addrspace(5) %"12", align 4 %"242" = inttoptr i64 %"98" to ptr - %"274" = getelementptr inbounds i8, ptr %"242", i64 12 - store i32 %"99", ptr %"274", align 4 + %"273" = getelementptr inbounds i8, ptr %"242", i64 12 + store i32 %"99", ptr %"273", align 4 %"100" = load i64, ptr addrspace(5) %"5", align 8 %"101" = load i32, ptr addrspace(5) %"14", align 4 %"244" = inttoptr i64 %"100" to ptr - %"276" = getelementptr inbounds i8, ptr %"244", i64 16 - store i32 %"101", ptr %"276", align 4 + %"275" = getelementptr inbounds i8, ptr %"244", i64 16 + store i32 %"101", ptr %"275", align 4 %"102" = load i64, ptr addrspace(5) %"5", align 8 %"103" = load i32, ptr addrspace(5) %"16", align 4 %"246" = inttoptr i64 %"102" to ptr - %"278" = getelementptr inbounds i8, ptr %"246", i64 20 - store i32 %"103", ptr %"278", align 4 + %"277" = getelementptr inbounds i8, ptr %"246", i64 20 + store i32 %"103", ptr %"277", align 4 %"104" = load i64, ptr addrspace(5) %"5", align 8 %"105" = load i32, ptr addrspace(5) %"18", align 4 %"248" = inttoptr i64 %"104" to ptr - %"280" = getelementptr inbounds i8, ptr %"248", i64 24 - store i32 %"105", ptr %"280", align 4 + %"279" = getelementptr inbounds i8, ptr %"248", i64 24 + store i32 %"105", ptr %"279", align 4 %"106" = load i64, ptr addrspace(5) %"5", align 8 %"107" = load i32, ptr addrspace(5) %"20", align 4 %"250" = inttoptr i64 %"106" to ptr - %"282" = getelementptr inbounds i8, ptr %"250", i64 28 - store i32 %"107", ptr %"282", align 4 + %"281" = getelementptr inbounds i8, ptr %"250", i64 28 + store i32 %"107", ptr %"281", align 4 %"108" = load i64, ptr addrspace(5) %"5", align 8 %"109" = load i32, ptr addrspace(5) %"7", align 4 %"252" = inttoptr i64 %"108" to ptr - %"284" = getelementptr inbounds i8, ptr %"252", i64 32 - store i32 %"109", ptr %"284", align 4 + %"283" = getelementptr inbounds i8, ptr %"252", i64 32 + store i32 %"109", ptr %"283", align 4 %"110" = load i64, ptr addrspace(5) %"5", align 8 %"111" = load i32, ptr addrspace(5) %"9", align 4 %"254" = inttoptr i64 %"110" to ptr - %"286" = getelementptr inbounds i8, ptr %"254", i64 36 - store i32 %"111", ptr %"286", align 4 + %"285" = getelementptr inbounds i8, ptr %"254", i64 36 + store i32 %"111", ptr %"285", align 4 %"112" = load i64, ptr addrspace(5) %"5", align 8 %"113" = load i32, ptr addrspace(5) %"11", align 4 %"256" = inttoptr i64 %"112" to ptr - %"288" = getelementptr inbounds i8, ptr %"256", i64 40 - store i32 %"113", ptr %"288", align 4 + %"287" = getelementptr inbounds i8, ptr %"256", i64 40 + store i32 %"113", ptr %"287", align 4 %"114" = load i64, ptr addrspace(5) %"5", align 8 %"115" = load i32, ptr addrspace(5) %"13", align 4 %"258" = inttoptr i64 %"114" to ptr - %"290" = getelementptr inbounds i8, ptr %"258", i64 44 - store i32 %"115", ptr %"290", align 4 + %"289" = getelementptr inbounds i8, ptr %"258", i64 44 + store i32 %"115", ptr %"289", align 4 %"116" = load i64, ptr addrspace(5) %"5", align 8 %"117" = load i32, ptr addrspace(5) %"15", align 4 %"260" = inttoptr i64 %"116" to ptr - %"292" = getelementptr inbounds i8, ptr %"260", i64 48 - store i32 %"117", ptr %"292", align 4 + %"291" = getelementptr inbounds i8, ptr %"260", i64 48 + store i32 %"117", ptr %"291", align 4 %"118" = load i64, ptr addrspace(5) %"5", align 8 %"119" = load i32, ptr addrspace(5) %"17", align 4 %"262" = inttoptr i64 %"118" to ptr - %"294" = getelementptr inbounds i8, ptr %"262", i64 52 - store i32 %"119", ptr %"294", align 4 + %"293" = getelementptr inbounds i8, ptr %"262", i64 52 + store i32 %"119", ptr %"293", align 4 %"120" = load i64, ptr addrspace(5) %"5", align 8 %"121" = load i32, ptr addrspace(5) %"19", align 4 %"264" = inttoptr i64 %"120" to ptr - %"296" = getelementptr inbounds i8, ptr %"264", i64 56 - store i32 %"121", ptr %"296", align 4 + %"295" = getelementptr inbounds i8, ptr %"264", i64 56 + store i32 %"121", ptr %"295", align 4 %"122" = load i64, ptr addrspace(5) %"5", align 8 %"123" = load i32, ptr addrspace(5) %"21", align 4 %"266" = inttoptr i64 %"122" to ptr - %"298" = getelementptr inbounds i8, ptr %"266", i64 60 - store i32 %"123", ptr %"298", align 4 + %"297" = getelementptr inbounds i8, ptr %"266", i64 60 + store i32 %"123", ptr %"297", align 4 ret void } diff --git a/ptx/src/test/spirv_run/clz.ll b/ptx/src/test/spirv_run/clz.ll index 31f408da..5a931456 100644 --- a/ptx/src/test/spirv_run/clz.ll +++ b/ptx/src/test/spirv_run/clz.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 @@ -17,8 +19,8 @@ define protected amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"16", ptr %"10" = load i32, ptr %"18", align 4 store i32 %"10", ptr addrspace(5) %"6", align 4 %"13" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) - store i32 %0, ptr addrspace(5) %"6", align 4 + %2 = call i32 @llvm.ctlz.i32(i32 %"13", i1 false) + store i32 %2, ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"15" = load i32, ptr addrspace(5) %"6", align 4 %"19" = inttoptr i64 %"14" to ptr diff --git a/ptx/src/test/spirv_run/const.ll b/ptx/src/test/spirv_run/const.ll index 80fcc072..df0de94d 100644 --- a/ptx/src/test/spirv_run/const.ll +++ b/ptx/src/test/spirv_run/const.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" @constparams = protected addrspace(4) externally_initialized global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8 define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"52": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"12", ptr addrspace(5) %"5", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 @@ -32,18 +34,18 @@ define protected amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"38", pt %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load i16, ptr addrspace(5) %"8", align 2 %"46" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"46", i64 2 - store i16 %"21", ptr %"60", align 2 + %"59" = getelementptr inbounds i8, ptr %"46", i64 2 + store i16 %"21", ptr %"59", align 2 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i16, ptr addrspace(5) %"9", align 2 %"48" = inttoptr i64 %"22" to ptr - %"62" = getelementptr inbounds i8, ptr %"48", i64 4 - store i16 %"23", ptr %"62", align 2 + %"61" = getelementptr inbounds i8, ptr %"48", i64 4 + store i16 %"23", ptr %"61", align 2 %"24" = load i64, ptr addrspace(5) %"6", align 8 %"25" = load i16, ptr addrspace(5) %"10", align 2 %"50" = inttoptr i64 %"24" to ptr - %"64" = getelementptr inbounds i8, ptr %"50", i64 6 - store i16 %"25", ptr %"64", align 2 + %"63" = getelementptr inbounds i8, ptr %"50", i64 6 + store i16 %"25", ptr %"63", align 2 ret void } diff --git a/ptx/src/test/spirv_run/constant_f32.ll b/ptx/src/test/spirv_run/constant_f32.ll index e0309ea2..a6558c9e 100644 --- a/ptx/src/test/spirv_run/constant_f32.ll +++ b/ptx/src/test/spirv_run/constant_f32.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/constant_negative.ll b/ptx/src/test/spirv_run/constant_negative.ll index 337689f0..c3e7e869 100644 --- a/ptx/src/test/spirv_run/constant_negative.ll +++ b/ptx/src/test/spirv_run/constant_negative.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cos.ll b/ptx/src/test/spirv_run/cos.ll index d385e1f3..da48297b 100644 --- a/ptx/src/test/spirv_run/cos.ll +++ b/ptx/src/test/spirv_run/cos.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/cvt_clamp.ll b/ptx/src/test/spirv_run/cvt_clamp.ll index f2be4772..b610ca9a 100644 --- a/ptx/src/test/spirv_run/cvt_clamp.ll +++ b/ptx/src/test/spirv_run/cvt_clamp.ll @@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa" declare float @__zluda_ptx_impl__cvt_sat_f32_f32(float) #0 define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 { -"56": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 @@ -27,8 +29,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" store float %"15", ptr addrspace(1) %"49", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr addrspace(1) - %"61" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 - %"16" = load float, ptr addrspace(1) %"61", align 4 + %"60" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 4 + %"16" = load float, ptr addrspace(1) %"60", align 4 store float %"16", ptr addrspace(5) %"6", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"18" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"19") @@ -36,12 +38,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load float, ptr addrspace(5) %"6", align 4 %"51" = inttoptr i64 %"20" to ptr addrspace(1) - %"63" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 - store float %"21", ptr addrspace(1) %"63", align 4 + %"62" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 4 + store float %"21", ptr addrspace(1) %"62", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"23" to ptr addrspace(1) - %"65" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 - %"22" = load float, ptr addrspace(1) %"65", align 4 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"52", i64 8 + %"22" = load float, ptr addrspace(1) %"64", align 4 store float %"22", ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"6", align 4 %"24" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"25") @@ -49,12 +51,12 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load float, ptr addrspace(5) %"6", align 4 %"53" = inttoptr i64 %"26" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 - store float %"27", ptr addrspace(1) %"67", align 4 + %"66" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 8 + store float %"27", ptr addrspace(1) %"66", align 4 %"29" = load i64, ptr addrspace(5) %"4", align 8 %"54" = inttoptr i64 %"29" to ptr addrspace(1) - %"69" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 - %"28" = load float, ptr addrspace(1) %"69", align 4 + %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 12 + %"28" = load float, ptr addrspace(1) %"68", align 4 store float %"28", ptr addrspace(5) %"6", align 4 %"31" = load float, ptr addrspace(5) %"6", align 4 %"30" = call float @__zluda_ptx_impl__cvt_sat_f32_f32(float %"31") @@ -62,8 +64,8 @@ define protected amdgpu_kernel void @cvt_clamp(ptr addrspace(4) byref(i64) %"46" %"32" = load i64, ptr addrspace(5) %"5", align 8 %"33" = load float, ptr addrspace(5) %"6", align 4 %"55" = inttoptr i64 %"32" to ptr addrspace(1) - %"71" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 - store float %"33", ptr addrspace(1) %"71", align 4 + %"70" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 12 + store float %"33", ptr addrspace(1) %"70", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f32_f16.ll b/ptx/src/test/spirv_run/cvt_f32_f16.ll index e3acdb6f..7379876e 100644 --- a/ptx/src/test/spirv_run/cvt_f32_f16.ll +++ b/ptx/src/test/spirv_run/cvt_f32_f16.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_f32_f16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca half, align 2, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_f32_s32.ll b/ptx/src/test/spirv_run/cvt_f32_s32.ll index 65b00cec..90b0e4aa 100644 --- a/ptx/src/test/spirv_run/cvt_f32_s32.ll +++ b/ptx/src/test/spirv_run/cvt_f32_s32.ll @@ -10,15 +10,17 @@ declare float @__zluda_ptx_impl__cvt_rp_f32_s32(i32) #0 declare float @__zluda_ptx_impl__cvt_rz_f32_s32(i32) #0 define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 { -"75": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"49", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"50", align 8 @@ -29,18 +31,18 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4 store i32 %"51", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"53" = inttoptr i64 %"16" to ptr - %"89" = getelementptr inbounds i8, ptr %"53", i64 4 - %"54" = load i32, ptr %"89", align 4 + %"88" = getelementptr inbounds i8, ptr %"53", i64 4 + %"54" = load i32, ptr %"88", align 4 store i32 %"54", ptr addrspace(5) %"7", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"55" = inttoptr i64 %"18" to ptr - %"91" = getelementptr inbounds i8, ptr %"55", i64 8 - %"56" = load i32, ptr %"91", align 4 + %"90" = getelementptr inbounds i8, ptr %"55", i64 8 + %"56" = load i32, ptr %"90", align 4 store i32 %"56", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"57" = inttoptr i64 %"20" to ptr - %"93" = getelementptr inbounds i8, ptr %"57", i64 12 - %"58" = load i32, ptr %"93", align 4 + %"92" = getelementptr inbounds i8, ptr %"57", i64 12 + %"58" = load i32, ptr %"92", align 4 store i32 %"58", ptr addrspace(5) %"9", align 4 %"22" = load i32, ptr addrspace(5) %"6", align 4 %"59" = call float @__zluda_ptx_impl__cvt_rn_f32_s32(i32 %"22") @@ -66,21 +68,21 @@ define protected amdgpu_kernel void @cvt_f32_s32(ptr addrspace(4) byref(i64) %"4 %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"7", align 4 %"69" = inttoptr i64 %"31" to ptr addrspace(1) - %"95" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 + %"94" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 %"70" = bitcast i32 %"32" to float - store float %"70", ptr addrspace(1) %"95", align 4 + store float %"70", ptr addrspace(1) %"94", align 4 %"33" = load i64, ptr addrspace(5) %"5", align 8 %"34" = load i32, ptr addrspace(5) %"8", align 4 %"71" = inttoptr i64 %"33" to ptr addrspace(1) - %"97" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 + %"96" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 8 %"72" = bitcast i32 %"34" to float - store float %"72", ptr addrspace(1) %"97", align 4 + store float %"72", ptr addrspace(1) %"96", align 4 %"35" = load i64, ptr addrspace(5) %"5", align 8 %"36" = load i32, ptr addrspace(5) %"9", align 4 %"73" = inttoptr i64 %"35" to ptr addrspace(1) - %"99" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 + %"98" = getelementptr inbounds i8, ptr addrspace(1) %"73", i64 12 %"74" = bitcast i32 %"36" to float - store float %"74", ptr addrspace(1) %"99", align 4 + store float %"74", ptr addrspace(1) %"98", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_f64_f32.ll b/ptx/src/test/spirv_run/cvt_f64_f32.ll index 96267f4b..64b4bb8f 100644 --- a/ptx/src/test/spirv_run/cvt_f64_f32.ll +++ b/ptx/src/test/spirv_run/cvt_f64_f32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca double, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_rni.ll b/ptx/src/test/spirv_run/cvt_rni.ll index 5eb6eaa4..77d2999e 100644 --- a/ptx/src/test/spirv_run/cvt_rni.ll +++ b/ptx/src/test/spirv_run/cvt_rni.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"30" = inttoptr i64 %"14" to ptr - %"35" = getelementptr inbounds i8, ptr %"30", i64 4 - %"13" = load float, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"34", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"15" = call float @llvm.rint.f32(float %"16") @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"27", %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load float, ptr addrspace(5) %"7", align 4 %"32" = inttoptr i64 %"21" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - store float %"22", ptr %"37", align 4 + %"36" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_rzi.ll b/ptx/src/test/spirv_run/cvt_rzi.ll index 83783d88..e651db50 100644 --- a/ptx/src/test/spirv_run/cvt_rzi.ll +++ b/ptx/src/test/spirv_run/cvt_rzi.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"30" = inttoptr i64 %"14" to ptr - %"35" = getelementptr inbounds i8, ptr %"30", i64 4 - %"13" = load float, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"30", i64 4 + %"13" = load float, ptr %"34", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"15" = call float @llvm.trunc.f32(float %"16") @@ -35,8 +37,8 @@ define protected amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"27", %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load float, ptr addrspace(5) %"7", align 4 %"32" = inttoptr i64 %"21" to ptr - %"37" = getelementptr inbounds i8, ptr %"32", i64 4 - store float %"22", ptr %"37", align 4 + %"36" = getelementptr inbounds i8, ptr %"32", i64 4 + store float %"22", ptr %"36", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s16_s8.ll b/ptx/src/test/spirv_run/cvt_s16_s8.ll index 841178e1..6f49cea5 100644 --- a/ptx/src/test/spirv_run/cvt_s16_s8.ll +++ b/ptx/src/test/spirv_run/cvt_s16_s8.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -18,8 +20,8 @@ define protected amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"17 %"11" = load i32, ptr addrspace(1) %"19", align 4 store i32 %"11", ptr addrspace(5) %"7", align 4 %"14" = load i32, ptr addrspace(5) %"7", align 4 - %"25" = trunc i32 %"14" to i8 - %"20" = sext i8 %"25" to i16 + %"24" = trunc i32 %"14" to i8 + %"20" = sext i8 %"24" to i16 %"13" = sext i16 %"20" to i32 store i32 %"13", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 diff --git a/ptx/src/test/spirv_run/cvt_s32_f32.ll b/ptx/src/test/spirv_run/cvt_s32_f32.ll index bd1b9e35..e8b8bc1e 100644 --- a/ptx/src/test/spirv_run/cvt_s32_f32.ll +++ b/ptx/src/test/spirv_run/cvt_s32_f32.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__cvt_rp_s32_f32(float) #0 define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { -"41": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"28", align 8 @@ -22,8 +24,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2 store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"14" to ptr - %"46" = getelementptr inbounds i8, ptr %"31", i64 4 - %"32" = load float, ptr %"46", align 4 + %"45" = getelementptr inbounds i8, ptr %"31", i64 4 + %"32" = load float, ptr %"45", align 4 %"13" = bitcast float %"32" to i32 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 @@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"2 %"21" = load i64, ptr addrspace(5) %"5", align 8 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"39" = inttoptr i64 %"21" to ptr addrspace(1) - %"48" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 - store i32 %"22", ptr addrspace(1) %"48", align 4 + %"47" = getelementptr inbounds i8, ptr addrspace(1) %"39", i64 4 + store i32 %"22", ptr addrspace(1) %"47", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_s64_s32.ll b/ptx/src/test/spirv_run/cvt_s64_s32.ll index 49582663..799b90af 100644 --- a/ptx/src/test/spirv_run/cvt_s64_s32.ll +++ b/ptx/src/test/spirv_run/cvt_s64_s32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvt_sat_s_u.ll b/ptx/src/test/spirv_run/cvt_sat_s_u.ll index 3af6ef54..5e8d0150 100644 --- a/ptx/src/test/spirv_run/cvt_sat_s_u.ll +++ b/ptx/src/test/spirv_run/cvt_sat_s_u.ll @@ -2,14 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"26", ptr addrspace(4) byref(i64) %"27") #0 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"26", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"27", align 8 @@ -19,18 +24,15 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2 %"12" = load i32, ptr %"28", align 4 store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i32, ptr addrspace(5) %"6", align 4 - %0 = call i32 @llvm.smax.i32(i32 %"15", i32 0) - %1 = alloca i32, align 4, addrspace(5) - store i32 %0, ptr addrspace(5) %1, align 4 + %5 = call i32 @llvm.smax.i32(i32 %"15", i32 0) + store i32 %5, ptr addrspace(5) %1, align 4 %"14" = load i32, ptr addrspace(5) %1, align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 - %2 = alloca i32, align 4, addrspace(5) store i32 %"17", ptr addrspace(5) %2, align 4 %"29" = load i32, ptr addrspace(5) %2, align 4 store i32 %"29", ptr addrspace(5) %"7", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 - %3 = alloca i32, align 4, addrspace(5) store i32 %"19", ptr addrspace(5) %3, align 4 %"30" = load i32, ptr addrspace(5) %3, align 4 store i32 %"30", ptr addrspace(5) %"8", align 4 @@ -41,8 +43,8 @@ define protected amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"2 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"8", align 4 %"33" = inttoptr i64 %"22" to ptr - %"36" = getelementptr inbounds i8, ptr %"33", i64 4 - store i32 %"23", ptr %"36", align 4 + %"35" = getelementptr inbounds i8, ptr %"33", i64 4 + store i32 %"23", ptr %"35", align 4 ret void } diff --git a/ptx/src/test/spirv_run/cvt_u32_s16.ll b/ptx/src/test/spirv_run/cvt_u32_s16.ll index 141f83f7..1b868a5a 100644 --- a/ptx/src/test/spirv_run/cvt_u32_s16.ll +++ b/ptx/src/test/spirv_run/cvt_u32_s16.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvt_u32_s16(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/cvta.ll b/ptx/src/test/spirv_run/cvta.ll index d5c0f730..7b73f8c5 100644 --- a/ptx/src/test/spirv_run/cvta.ll +++ b/ptx/src/test/spirv_run/cvta.ll @@ -2,25 +2,27 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"26": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"19", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(5) %"4", align 8 - %0 = inttoptr i64 %"11" to ptr - %1 = addrspacecast ptr %0 to ptr addrspace(1) - %"20" = ptrtoint ptr addrspace(1) %1 to i64 + %2 = inttoptr i64 %"11" to ptr + %3 = addrspacecast ptr %2 to ptr addrspace(1) + %"20" = ptrtoint ptr addrspace(1) %3 to i64 store i64 %"20", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(5) %"5", align 8 - %2 = inttoptr i64 %"13" to ptr - %3 = addrspacecast ptr %2 to ptr addrspace(1) - %"22" = ptrtoint ptr addrspace(1) %3 to i64 + %4 = inttoptr i64 %"13" to ptr + %5 = addrspacecast ptr %4 to ptr addrspace(1) + %"22" = ptrtoint ptr addrspace(1) %5 to i64 store i64 %"22", ptr addrspace(5) %"5", align 8 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"24" = inttoptr i64 %"15" to ptr addrspace(1) diff --git a/ptx/src/test/spirv_run/div_approx.ll b/ptx/src/test/spirv_run/div_approx.ll index 833065e1..d4b889f4 100644 --- a/ptx/src/test/spirv_run/div_approx.ll +++ b/ptx/src/test/spirv_run/div_approx.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"22 store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/dp4a.ll b/ptx/src/test/spirv_run/dp4a.ll index 2ada6cb8..97f40986 100644 --- a/ptx/src/test/spirv_run/dp4a.ll +++ b/ptx/src/test/spirv_run/dp4a.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__dp4a_s32_s32(i32, i32, i32) #0 define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 { -"38": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -22,13 +24,13 @@ define protected amdgpu_kernel void @dp4a(ptr addrspace(4) byref(i64) %"28", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"45" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load i32, ptr %"45", align 4 + %"44" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load i32, ptr %"44", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"47" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load i32, ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load i32, ptr %"46", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i32, ptr addrspace(5) %"6", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/ex2.ll b/ptx/src/test/spirv_run/ex2.ll index b5e671e7..aa0c1d57 100644 --- a/ptx/src/test/spirv_run/ex2.ll +++ b/ptx/src/test/spirv_run/ex2.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"56": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"47", align 8 @@ -25,8 +27,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr store float %"15", ptr %"49", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr - %"58" = getelementptr inbounds i8, ptr %"50", i64 4 - %"16" = load float, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load float, ptr %"57", align 4 store float %"16", ptr addrspace(5) %"6", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"18" = call afn float @llvm.exp2.f32(float %"19") @@ -34,12 +36,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load float, ptr addrspace(5) %"6", align 4 %"51" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"51", i64 4 - store float %"21", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"51", i64 4 + store float %"21", ptr %"59", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"23" to ptr - %"62" = getelementptr inbounds i8, ptr %"52", i64 8 - %"22" = load float, ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"52", i64 8 + %"22" = load float, ptr %"61", align 4 store float %"22", ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"6", align 4 %"24" = call afn float @llvm.exp2.f32(float %"25") @@ -47,12 +49,12 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load float, ptr addrspace(5) %"6", align 4 %"53" = inttoptr i64 %"26" to ptr - %"64" = getelementptr inbounds i8, ptr %"53", i64 8 - store float %"27", ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"53", i64 8 + store float %"27", ptr %"63", align 4 %"29" = load i64, ptr addrspace(5) %"4", align 8 %"54" = inttoptr i64 %"29" to ptr - %"66" = getelementptr inbounds i8, ptr %"54", i64 12 - %"28" = load float, ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"54", i64 12 + %"28" = load float, ptr %"65", align 4 store float %"28", ptr addrspace(5) %"6", align 4 %"31" = load float, ptr addrspace(5) %"6", align 4 %"30" = call afn float @llvm.exp2.f32(float %"31") @@ -60,8 +62,8 @@ define protected amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"46", ptr %"32" = load i64, ptr addrspace(5) %"5", align 8 %"33" = load float, ptr addrspace(5) %"6", align 4 %"55" = inttoptr i64 %"32" to ptr - %"68" = getelementptr inbounds i8, ptr %"55", i64 12 - store float %"33", ptr %"68", align 4 + %"67" = getelementptr inbounds i8, ptr %"55", i64 12 + store float %"33", ptr %"67", align 4 ret void } diff --git a/ptx/src/test/spirv_run/extern_shared.ll b/ptx/src/test/spirv_run/extern_shared.ll index eeb0d508..e7d0a21a 100644 --- a/ptx/src/test/spirv_run/extern_shared.ll +++ b/ptx/src/test/spirv_run/extern_shared.ll @@ -4,12 +4,14 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32] define protected amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/extern_shared_call.ll b/ptx/src/test/spirv_run/extern_shared_call.ll index cdd37be0..a2b6c101 100644 --- a/ptx/src/test/spirv_run/extern_shared_call.ll +++ b/ptx/src/test/spirv_run/extern_shared_call.ll @@ -3,28 +3,32 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i32], align 4 -define private void @"2"(ptr addrspace(3) %"35") #0 { -"33": +define private void @"2"(ptr addrspace(3) %"33") #0 { %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"3" = alloca i64, align 8, addrspace(5) - %"12" = load i64, ptr addrspace(3) %"35", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 + %"12" = load i64, ptr addrspace(3) %"33", align 8 store i64 %"12", ptr addrspace(5) %"3", align 8 %"14" = load i64, ptr addrspace(5) %"3", align 8 %"13" = add i64 %"14", 2 store i64 %"13", ptr addrspace(5) %"3", align 8 %"15" = load i64, ptr addrspace(5) %"3", align 8 - store i64 %"15", ptr addrspace(3) %"35", align 8 + store i64 %"15", ptr addrspace(3) %"33", align 8 ret void } define protected amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"34": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"16" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"16", ptr addrspace(5) %"7", align 8 %"17" = load i64, ptr addrspace(4) %"26", align 8 diff --git a/ptx/src/test/spirv_run/fma.ll b/ptx/src/test/spirv_run/fma.ll index 1dff2b85..61ef7758 100644 --- a/ptx/src/test/spirv_run/fma.ll +++ b/ptx/src/test/spirv_run/fma.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #0 { -"34": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"28", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"29", align 8 @@ -20,13 +22,13 @@ define protected amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"28", ptr store float %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"31" = inttoptr i64 %"15" to ptr - %"36" = getelementptr inbounds i8, ptr %"31", i64 4 - %"14" = load float, ptr %"36", align 4 + %"35" = getelementptr inbounds i8, ptr %"31", i64 4 + %"14" = load float, ptr %"35", align 4 store float %"14", ptr addrspace(5) %"7", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"32" = inttoptr i64 %"17" to ptr - %"38" = getelementptr inbounds i8, ptr %"32", i64 8 - %"16" = load float, ptr %"38", align 4 + %"37" = getelementptr inbounds i8, ptr %"32", i64 8 + %"16" = load float, ptr %"37", align 4 store float %"16", ptr addrspace(5) %"8", align 4 %"19" = load float, ptr addrspace(5) %"6", align 4 %"20" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/func_ptr.ll b/ptx/src/test/spirv_run/func_ptr.ll index 1160a766..ad4392b8 100644 --- a/ptx/src/test/spirv_run/func_ptr.ll +++ b/ptx/src/test/spirv_run/func_ptr.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private float @"1"(float %"15", float %"16") #0 { -"38": %"3" = alloca float, align 4, addrspace(5) %"4" = alloca float, align 4, addrspace(5) %"2" = alloca float, align 4, addrspace(5) %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 + br label %1 + +1: ; preds = %0 store float %"15", ptr addrspace(5) %"3", align 4 store float %"16", ptr addrspace(5) %"4", align 4 + store i1 false, ptr addrspace(5) %"13", align 1 %"18" = load float, ptr addrspace(5) %"3", align 4 %"19" = load float, ptr addrspace(5) %"4", align 4 %"17" = fadd float %"18", %"19" @@ -19,14 +21,16 @@ define private float @"1"(float %"15", float %"16") #0 { } define protected amdgpu_kernel void @func_ptr(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"21" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"21", ptr addrspace(5) %"8", align 8 %"22" = load i64, ptr addrspace(4) %"35", align 8 diff --git a/ptx/src/test/spirv_run/generic.ll b/ptx/src/test/spirv_run/generic.ll index 312a7cdd..44b4ef9f 100644 --- a/ptx/src/test/spirv_run/generic.ll +++ b/ptx/src/test/spirv_run/generic.ll @@ -5,18 +5,20 @@ target triple = "amdgcn-amd-amdhsa" @bar = protected addrspace(1) externally_initialized global [4 x i64] [i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 4), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 8), i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @foo to ptr) to i64), i64 12)] define protected amdgpu_kernel void @generic(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"57": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"47", align 8 store i64 %"11", ptr addrspace(5) %"7", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %0, align 4 - %"12" = load i32, ptr addrspace(5) %0, align 4 + store i32 1, ptr addrspace(5) %1, align 4 + %"12" = load i32, ptr addrspace(5) %1, align 4 store i32 %"12", ptr addrspace(5) %"8", align 4 %"13" = load i64, ptr addrspace(1) @bar, align 8 store i64 %"13", ptr addrspace(5) %"6", align 8 diff --git a/ptx/src/test/spirv_run/global_array.ll b/ptx/src/test/spirv_run/global_array.ll index e2ad2f22..59a66eae 100644 --- a/ptx/src/test/spirv_run/global_array.ll +++ b/ptx/src/test/spirv_run/global_array.ll @@ -5,15 +5,17 @@ target triple = "amdgcn-amd-amdhsa" @foobar = protected addrspace(1) externally_initialized global [4 x [2 x i64]] [[2 x i64] [i64 -1, i64 2], [2 x i64] [i64 3, i64 0], [2 x i64] [i64 ptrtoint (ptr addrspace(1) @asdas to i64), i64 0], [2 x i64] zeroinitializer] define protected amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"21": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %0, align 8 - %"10" = load i64, ptr addrspace(5) %0, align 8 + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 + store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %1, align 8 + %"10" = load i64, ptr addrspace(5) %1, align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"11" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"11", ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/lanemask_lt.ll b/ptx/src/test/spirv_run/lanemask_lt.ll index efa1746f..cc81383b 100644 --- a/ptx/src/test/spirv_run/lanemask_lt.ll +++ b/ptx/src/test/spirv_run/lanemask_lt.ll @@ -4,14 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_lanemask_lt() #0 define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"27", ptr addrspace(4) byref(i64) %"28") #1 { -"39": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"14" = load i64, ptr addrspace(4) %"27", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"28", align 8 @@ -24,9 +27,8 @@ define protected amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"2 %"31" = add i32 %"19", 1 store i32 %"31", ptr addrspace(5) %"7", align 4 %"11" = call i32 @__zluda_ptx_impl__sreg_lanemask_lt() - %0 = alloca i32, align 4, addrspace(5) - store i32 %"11", ptr addrspace(5) %0, align 4 - %"33" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"11", ptr addrspace(5) %1, align 4 + %"33" = load i32, ptr addrspace(5) %1, align 4 store i32 %"33", ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"23" = load i32, ptr addrspace(5) %"8", align 4 diff --git a/ptx/src/test/spirv_run/ld_st.ll b/ptx/src/test/spirv_run/ld_st.ll index 0fe06f23..4b231208 100644 --- a/ptx/src/test/spirv_run/ld_st.ll +++ b/ptx/src/test/spirv_run/ld_st.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"18": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/ld_st_implicit.ll b/ptx/src/test/spirv_run/ld_st_implicit.ll index 3ec14749..71baa929 100644 --- a/ptx/src/test/spirv_run/ld_st_implicit.ll +++ b/ptx/src/test/spirv_run/ld_st_implicit.ll @@ -2,31 +2,33 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"22": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 81985529216486895, ptr addrspace(5) %0, align 8 - %"10" = load i64, ptr addrspace(5) %0, align 8 + store i64 81985529216486895, ptr addrspace(5) %1, align 8 + %"10" = load i64, ptr addrspace(5) %1, align 8 store i64 %"10", ptr addrspace(5) %"6", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"19" = inttoptr i64 %"12" to ptr addrspace(1) %"18" = load float, ptr addrspace(1) %"19", align 4 - %"23" = bitcast float %"18" to i32 - %"11" = zext i32 %"23" to i64 + %"22" = bitcast float %"18" to i32 + %"11" = zext i32 %"22" to i64 store i64 %"11", ptr addrspace(5) %"6", align 8 %"13" = load i64, ptr addrspace(5) %"5", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 %"20" = inttoptr i64 %"13" to ptr addrspace(1) - %"25" = trunc i64 %"14" to i32 - %"21" = bitcast i32 %"25" to float + %"24" = trunc i64 %"14" to i32 + %"21" = bitcast i32 %"24" to float store float %"21", ptr addrspace(1) %"20", align 4 ret void } diff --git a/ptx/src/test/spirv_run/ld_st_offset.ll b/ptx/src/test/spirv_run/ld_st_offset.ll index ee8bde6c..959aa53e 100644 --- a/ptx/src/test/spirv_run/ld_st_offset.ll +++ b/ptx/src/test/spirv_run/ld_st_offset.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %" store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"31" = getelementptr inbounds i8, ptr %"26", i64 4 - %"13" = load i32, ptr %"31", align 4 + %"30" = getelementptr inbounds i8, ptr %"26", i64 4 + %"13" = load i32, ptr %"30", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i32, ptr addrspace(5) %"7", align 4 @@ -29,8 +31,8 @@ define protected amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %" %"17" = load i64, ptr addrspace(5) %"5", align 8 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"28" = inttoptr i64 %"17" to ptr - %"33" = getelementptr inbounds i8, ptr %"28", i64 4 - store i32 %"18", ptr %"33", align 4 + %"32" = getelementptr inbounds i8, ptr %"28", i64 4 + store i32 %"18", ptr %"32", align 4 ret void } diff --git a/ptx/src/test/spirv_run/lg2.ll b/ptx/src/test/spirv_run/lg2.ll index 7dd63d64..9e4500ec 100644 --- a/ptx/src/test/spirv_run/lg2.ll +++ b/ptx/src/test/spirv_run/lg2.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/local_align.ll b/ptx/src/test/spirv_run/local_align.ll index 13fbe4bb..284a0814 100644 --- a/ptx/src/test/spirv_run/local_align.ll +++ b/ptx/src/test/spirv_run/local_align.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"15", ptr addrspace(4) byref(i64) %"16") #0 { -"19": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"15", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"16", align 8 diff --git a/ptx/src/test/spirv_run/mad_hi_cc.ll b/ptx/src/test/spirv_run/mad_hi_cc.ll index 6c86dbcb..f9a27b41 100644 --- a/ptx/src/test/spirv_run/mad_hi_cc.ll +++ b/ptx/src/test/spirv_run/mad_hi_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #0 { -"77": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -15,6 +13,10 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"60", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"61", align 8 @@ -25,44 +27,44 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" store i32 %"62", ptr addrspace(5) %"8", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"64" = inttoptr i64 %"20" to ptr - %"79" = getelementptr inbounds i8, ptr %"64", i64 4 - %"65" = load i32, ptr %"79", align 4 + %"78" = getelementptr inbounds i8, ptr %"64", i64 4 + %"65" = load i32, ptr %"78", align 4 store i32 %"65", ptr addrspace(5) %"9", align 4 %"22" = load i64, ptr addrspace(5) %"4", align 8 %"66" = inttoptr i64 %"22" to ptr - %"81" = getelementptr inbounds i8, ptr %"66", i64 8 - %"21" = load i32, ptr %"81", align 4 + %"80" = getelementptr inbounds i8, ptr %"66", i64 8 + %"21" = load i32, ptr %"80", align 4 store i32 %"21", ptr addrspace(5) %"10", align 4 %"25" = load i32, ptr addrspace(5) %"8", align 4 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"27" = load i32, ptr addrspace(5) %"10", align 4 - %0 = sext i32 %"25" to i64 - %1 = sext i32 %"26" to i64 - %2 = mul nsw i64 %0, %1 - %3 = lshr i64 %2, 32 - %4 = trunc i64 %3 to i32 - %5 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %4, i32 %"27") - %"23" = extractvalue { i32, i1 } %5, 0 - %"24" = extractvalue { i32, i1 } %5, 1 + %2 = sext i32 %"25" to i64 + %3 = sext i32 %"26" to i64 + %4 = mul nsw i64 %2, %3 + %5 = lshr i64 %4, 32 + %6 = trunc i64 %5 to i32 + %7 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %6, i32 %"27") + %"23" = extractvalue { i32, i1 } %7, 0 + %"24" = extractvalue { i32, i1 } %7, 1 store i32 %"23", ptr addrspace(5) %"7", align 4 store i1 %"24", ptr addrspace(5) %"14", align 1 - %6 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) - %"28" = extractvalue { i32, i1 } %6, 0 - %"29" = extractvalue { i32, i1 } %6, 1 + %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -2) + %"28" = extractvalue { i32, i1 } %8, 0 + %"29" = extractvalue { i32, i1 } %8, 1 store i32 %"28", ptr addrspace(5) %"6", align 4 store i1 %"29", ptr addrspace(5) %"14", align 1 %"31" = load i1, ptr addrspace(5) %"14", align 1 - %7 = zext i1 %"31" to i32 - %"70" = add i32 0, %7 + %9 = zext i1 %"31" to i32 + %"70" = add i32 0, %9 store i32 %"70", ptr addrspace(5) %"12", align 4 - %8 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) - %"32" = extractvalue { i32, i1 } %8, 0 - %"33" = extractvalue { i32, i1 } %8, 1 + %10 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 -1) + %"32" = extractvalue { i32, i1 } %10, 0 + %"33" = extractvalue { i32, i1 } %10, 1 store i32 %"32", ptr addrspace(5) %"6", align 4 store i1 %"33", ptr addrspace(5) %"14", align 1 %"35" = load i1, ptr addrspace(5) %"14", align 1 - %9 = zext i1 %"35" to i32 - %"71" = add i32 0, %9 + %11 = zext i1 %"35" to i32 + %"71" = add i32 0, %11 store i32 %"71", ptr addrspace(5) %"13", align 4 %"36" = load i64, ptr addrspace(5) %"5", align 8 %"37" = load i32, ptr addrspace(5) %"7", align 4 @@ -71,13 +73,13 @@ define protected amdgpu_kernel void @mad_hi_cc(ptr addrspace(4) byref(i64) %"60" %"38" = load i64, ptr addrspace(5) %"5", align 8 %"39" = load i32, ptr addrspace(5) %"12", align 4 %"73" = inttoptr i64 %"38" to ptr - %"83" = getelementptr inbounds i8, ptr %"73", i64 4 - store i32 %"39", ptr %"83", align 4 + %"82" = getelementptr inbounds i8, ptr %"73", i64 4 + store i32 %"39", ptr %"82", align 4 %"40" = load i64, ptr addrspace(5) %"5", align 8 %"41" = load i32, ptr addrspace(5) %"13", align 4 %"75" = inttoptr i64 %"40" to ptr - %"85" = getelementptr inbounds i8, ptr %"75", i64 8 - store i32 %"41", ptr %"85", align 4 + %"84" = getelementptr inbounds i8, ptr %"75", i64 8 + store i32 %"41", ptr %"84", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mad_s32.ll b/ptx/src/test/spirv_run/mad_s32.ll index 5ab86adf..f1c15cfb 100644 --- a/ptx/src/test/spirv_run/mad_s32.ll +++ b/ptx/src/test/spirv_run/mad_s32.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #0 { -"75": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"14" = load i64, ptr addrspace(4) %"52", align 8 store i64 %"14", ptr addrspace(5) %"4", align 8 %"15" = load i64, ptr addrspace(4) %"53", align 8 @@ -24,42 +26,42 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", store i32 %"54", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"56" = inttoptr i64 %"19" to ptr - %"77" = getelementptr inbounds i8, ptr %"56", i64 4 - %"57" = load i32, ptr %"77", align 4 + %"76" = getelementptr inbounds i8, ptr %"56", i64 4 + %"57" = load i32, ptr %"76", align 4 store i32 %"57", ptr addrspace(5) %"10", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"58" = inttoptr i64 %"21" to ptr - %"79" = getelementptr inbounds i8, ptr %"58", i64 8 - %"20" = load i64, ptr %"79", align 8 + %"78" = getelementptr inbounds i8, ptr %"58", i64 8 + %"20" = load i64, ptr %"78", align 8 store i64 %"20", ptr addrspace(5) %"12", align 8 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"59" = inttoptr i64 %"23" to ptr - %"81" = getelementptr inbounds i8, ptr %"59", i64 16 - %"60" = load i32, ptr %"81", align 4 + %"80" = getelementptr inbounds i8, ptr %"59", i64 16 + %"60" = load i32, ptr %"80", align 4 store i32 %"60", ptr addrspace(5) %"11", align 4 %"25" = load i32, ptr addrspace(5) %"9", align 4 %"26" = load i32, ptr addrspace(5) %"10", align 4 %"27" = load i32, ptr addrspace(5) %"11", align 4 - %0 = mul i32 %"25", %"26" - %"24" = add i32 %0, %"27" + %2 = mul i32 %"25", %"26" + %"24" = add i32 %2, %"27" store i32 %"24", ptr addrspace(5) %"6", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 %"30" = load i32, ptr addrspace(5) %"10", align 4 %"31" = load i32, ptr addrspace(5) %"11", align 4 - %1 = sext i32 %"29" to i64 - %2 = sext i32 %"30" to i64 - %3 = mul nsw i64 %1, %2 - %4 = lshr i64 %3, 32 - %5 = trunc i64 %4 to i32 - %"28" = add i32 %5, %"31" + %3 = sext i32 %"29" to i64 + %4 = sext i32 %"30" to i64 + %5 = mul nsw i64 %3, %4 + %6 = lshr i64 %5, 32 + %7 = trunc i64 %6 to i32 + %"28" = add i32 %7, %"31" store i32 %"28", ptr addrspace(5) %"7", align 4 %"33" = load i32, ptr addrspace(5) %"9", align 4 %"34" = load i32, ptr addrspace(5) %"10", align 4 %"35" = load i64, ptr addrspace(5) %"12", align 8 - %6 = sext i32 %"33" to i64 - %7 = sext i32 %"34" to i64 - %8 = mul nsw i64 %6, %7 - %"67" = add i64 %8, %"35" + %8 = sext i32 %"33" to i64 + %9 = sext i32 %"34" to i64 + %10 = mul nsw i64 %8, %9 + %"67" = add i64 %10, %"35" store i64 %"67", ptr addrspace(5) %"8", align 8 %"36" = load i64, ptr addrspace(5) %"5", align 8 %"37" = load i32, ptr addrspace(5) %"6", align 4 @@ -68,13 +70,13 @@ define protected amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"52", %"38" = load i64, ptr addrspace(5) %"5", align 8 %"39" = load i32, ptr addrspace(5) %"7", align 4 %"72" = inttoptr i64 %"38" to ptr - %"83" = getelementptr inbounds i8, ptr %"72", i64 8 - store i32 %"39", ptr %"83", align 4 + %"82" = getelementptr inbounds i8, ptr %"72", i64 8 + store i32 %"39", ptr %"82", align 4 %"40" = load i64, ptr addrspace(5) %"5", align 8 %"41" = load i64, ptr addrspace(5) %"8", align 8 %"73" = inttoptr i64 %"40" to ptr - %"85" = getelementptr inbounds i8, ptr %"73", i64 16 - store i64 %"41", ptr %"85", align 8 + %"84" = getelementptr inbounds i8, ptr %"73", i64 16 + store i64 %"41", ptr %"84", align 8 ret void } diff --git a/ptx/src/test/spirv_run/madc_cc.ll b/ptx/src/test/spirv_run/madc_cc.ll index 136f320e..0c9df2b2 100644 --- a/ptx/src/test/spirv_run/madc_cc.ll +++ b/ptx/src/test/spirv_run/madc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"54": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 @@ -22,34 +24,34 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", store i32 %"42", ptr addrspace(5) %"8", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"17" to ptr - %"56" = getelementptr inbounds i8, ptr %"44", i64 4 - %"45" = load i32, ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"55", align 4 store i32 %"45", ptr addrspace(5) %"9", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"46" = inttoptr i64 %"19" to ptr - %"58" = getelementptr inbounds i8, ptr %"46", i64 8 - %"18" = load i32, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"46", i64 8 + %"18" = load i32, ptr %"57", align 4 store i32 %"18", ptr addrspace(5) %"10", align 4 %"22" = load i32, ptr addrspace(5) %"8", align 4 %"23" = load i32, ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"10", align 4 - %0 = mul i32 %"22", %"23" - %1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %0, i32 %"24") - %"20" = extractvalue { i32, i1 } %1, 0 - %"21" = extractvalue { i32, i1 } %1, 1 + %2 = mul i32 %"22", %"23" + %3 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %2, i32 %"24") + %"20" = extractvalue { i32, i1 } %3, 0 + %"21" = extractvalue { i32, i1 } %3, 1 store i32 %"20", ptr addrspace(5) %"6", align 4 store i1 %"21", ptr addrspace(5) %"11", align 1 %"26" = load i1, ptr addrspace(5) %"11", align 1 %"27" = load i32, ptr addrspace(5) %"8", align 4 %"28" = load i32, ptr addrspace(5) %"9", align 4 - %2 = sext i32 %"27" to i64 - %3 = sext i32 %"28" to i64 - %4 = mul nsw i64 %2, %3 - %5 = lshr i64 %4, 32 - %6 = trunc i64 %5 to i32 - %7 = zext i1 %"26" to i32 - %8 = add i32 %6, 3 - %"25" = add i32 %8, %7 + %4 = sext i32 %"27" to i64 + %5 = sext i32 %"28" to i64 + %6 = mul nsw i64 %4, %5 + %7 = lshr i64 %6, 32 + %8 = trunc i64 %7 to i32 + %9 = zext i1 %"26" to i32 + %10 = add i32 %8, 3 + %"25" = add i32 %10, %9 store i32 %"25", ptr addrspace(5) %"7", align 4 %"29" = load i64, ptr addrspace(5) %"5", align 8 %"30" = load i32, ptr addrspace(5) %"6", align 4 @@ -58,8 +60,8 @@ define protected amdgpu_kernel void @madc_cc(ptr addrspace(4) byref(i64) %"40", %"31" = load i64, ptr addrspace(5) %"5", align 8 %"32" = load i32, ptr addrspace(5) %"7", align 4 %"53" = inttoptr i64 %"31" to ptr - %"60" = getelementptr inbounds i8, ptr %"53", i64 4 - store i32 %"32", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"53", i64 4 + store i32 %"32", ptr %"59", align 4 ret void } diff --git a/ptx/src/test/spirv_run/max.ll b/ptx/src/test/spirv_run/max.ll index 6dcc74d6..ef0b39df 100644 --- a/ptx/src/test/spirv_run/max.ll +++ b/ptx/src/test/spirv_run/max.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/membar.ll b/ptx/src/test/spirv_run/membar.ll index 78f60c85..f24c0fb5 100644 --- a/ptx/src/test/spirv_run/membar.ll +++ b/ptx/src/test/spirv_run/membar.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"19": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/min.ll b/ptx/src/test/spirv_run/min.ll index 58cb36a9..b40c4dbe 100644 --- a/ptx/src/test/spirv_run/min.ll +++ b/ptx/src/test/spirv_run/min.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mov.ll b/ptx/src/test/spirv_run/mov.ll index e24446af..d43fe686 100644 --- a/ptx/src/test/spirv_run/mov.ll +++ b/ptx/src/test/spirv_run/mov.ll @@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"21": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -18,9 +21,8 @@ define protected amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"17", ptr %"11" = load i64, ptr %"19", align 8 store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"14", ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"14", ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/mov_address.ll b/ptx/src/test/spirv_run/mov_address.ll index 656410cb..42d987f2 100644 --- a/ptx/src/test/spirv_run/mov_address.ll +++ b/ptx/src/test/spirv_run/mov_address.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"8", ptr addrspace(4) byref(i64) %"9") #0 { -"11": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca [8 x i8], align 1, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"10" = ptrtoint ptr addrspace(5) %"4" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"10", ptr addrspace(5) %0, align 8 - %"7" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"10", ptr addrspace(5) %1, align 8 + %"7" = load i64, ptr addrspace(5) %1, align 8 store i64 %"7", ptr addrspace(5) %"5", align 8 ret void } diff --git a/ptx/src/test/spirv_run/mov_vector_cast.ll b/ptx/src/test/spirv_run/mov_vector_cast.ll index e65ad947..eb817247 100644 --- a/ptx/src/test/spirv_run/mov_vector_cast.ll +++ b/ptx/src/test/spirv_run/mov_vector_cast.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"49": %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) @@ -14,6 +12,12 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"10" = alloca half, align 2, addrspace(5) %"11" = alloca half, align 2, addrspace(5) %"12" = alloca half, align 2, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"15", align 1 %"16" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"35", align 8 @@ -23,9 +27,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"18" = load i64, ptr %"36", align 8 store i64 %"18", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"20", ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"20", ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 %"38" = bitcast i64 %"13" to <2 x i32> %"39" = extractelement <2 x i32> %"38", i32 0 %"40" = extractelement <2 x i32> %"38", i32 1 @@ -34,9 +37,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) store float %"21", ptr addrspace(5) %"7", align 4 store float %"22", ptr addrspace(5) %"8", align 4 %"23" = load i64, ptr addrspace(5) %"6", align 8 - %1 = alloca i64, align 8, addrspace(5) - store i64 %"23", ptr addrspace(5) %1, align 8 - %"14" = load i64, ptr addrspace(5) %1, align 8 + store i64 %"23", ptr addrspace(5) %2, align 8 + %"14" = load i64, ptr addrspace(5) %2, align 8 %"42" = bitcast i64 %"14" to <4 x i16> %"43" = extractelement <4 x i16> %"42", i32 0 %"44" = extractelement <4 x i16> %"42", i32 1 @@ -57,8 +59,8 @@ define protected amdgpu_kernel void @mov_vector_cast(ptr addrspace(4) byref(i64) %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load float, ptr addrspace(5) %"7", align 4 %"48" = inttoptr i64 %"30" to ptr - %"51" = getelementptr inbounds i8, ptr %"48", i64 4 - store float %"31", ptr %"51", align 4 + %"50" = getelementptr inbounds i8, ptr %"48", i64 4 + store float %"31", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/mul_ftz.ll b/ptx/src/test/spirv_run/mul_ftz.ll index 3c32e73b..38867fef 100644 --- a/ptx/src/test/spirv_run/mul_ftz.ll +++ b/ptx/src/test/spirv_run/mul_ftz.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"22", store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mul_hi.ll b/ptx/src/test/spirv_run/mul_hi.ll index 7d8ffa90..8043deb0 100644 --- a/ptx/src/test/spirv_run/mul_hi.ll +++ b/ptx/src/test/spirv_run/mul_hi.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" declare i64 @__zluda_ptx_impl__mul_hi_u64(i64, i64) #0 define protected amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/mul_lo.ll b/ptx/src/test/spirv_run/mul_lo.ll index 57a767d4..9370500f 100644 --- a/ptx/src/test/spirv_run/mul_lo.ll +++ b/ptx/src/test/spirv_run/mul_lo.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/mul_non_ftz.ll b/ptx/src/test/spirv_run/mul_non_ftz.ll index e6a3cc4d..89f5e9f7 100644 --- a/ptx/src/test/spirv_run/mul_non_ftz.ll +++ b/ptx/src/test/spirv_run/mul_non_ftz.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"2 store float %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load float, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load float, ptr %"28", align 4 store float %"13", ptr addrspace(5) %"7", align 4 %"16" = load float, ptr addrspace(5) %"6", align 4 %"17" = load float, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/mul_wide.ll b/ptx/src/test/spirv_run/mul_wide.ll index e25a61d0..a0d84f4b 100644 --- a/ptx/src/test/spirv_run/mul_wide.ll +++ b/ptx/src/test/spirv_run/mul_wide.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"24", align 8 @@ -20,14 +22,14 @@ define protected amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"23", store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"15" to ptr addrspace(1) - %"31" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 - %"14" = load i32, ptr addrspace(1) %"31", align 4 + %"30" = getelementptr inbounds i8, ptr addrspace(1) %"26", i64 4 + %"14" = load i32, ptr addrspace(1) %"30", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"18" = load i32, ptr addrspace(5) %"7", align 4 - %0 = sext i32 %"17" to i64 - %1 = sext i32 %"18" to i64 - %"16" = mul nsw i64 %0, %1 + %2 = sext i32 %"17" to i64 + %3 = sext i32 %"18" to i64 + %"16" = mul nsw i64 %2, %3 store i64 %"16", ptr addrspace(5) %"8", align 8 %"19" = load i64, ptr addrspace(5) %"5", align 8 %"20" = load i64, ptr addrspace(5) %"8", align 8 diff --git a/ptx/src/test/spirv_run/multireg.ll b/ptx/src/test/spirv_run/multireg.ll index 657d61f9..3eb31cb8 100644 --- a/ptx/src/test/spirv_run/multireg.ll +++ b/ptx/src/test/spirv_run/multireg.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @multireg(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/neg.ll b/ptx/src/test/spirv_run/neg.ll index 1e94ed14..056b0a19 100644 --- a/ptx/src/test/spirv_run/neg.ll +++ b/ptx/src/test/spirv_run/neg.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll index 69ea8d24..d0c71ebd 100644 --- a/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll +++ b/ptx/src/test/spirv_run/non_scalar_ptr_offset.ll @@ -2,21 +2,23 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"26": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"24" = inttoptr i64 %"12" to ptr addrspace(1) - %"28" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 - %"8" = load <2 x i32>, ptr addrspace(1) %"28", align 8 + %"27" = getelementptr inbounds i8, ptr addrspace(1) %"24", i64 8 + %"8" = load <2 x i32>, ptr addrspace(1) %"27", align 8 %"13" = extractelement <2 x i32> %"8", i32 0 %"14" = extractelement <2 x i32> %"8", i32 1 store i32 %"13", ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/not.ll b/ptx/src/test/spirv_run/not.ll index 5e865458..7c9a5577 100644 --- a/ptx/src/test/spirv_run/not.ll +++ b/ptx/src/test/spirv_run/not.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 diff --git a/ptx/src/test/spirv_run/ntid.ll b/ptx/src/test/spirv_run/ntid.ll index 53216ce7..29fccca4 100644 --- a/ptx/src/test/spirv_run/ntid.ll +++ b/ptx/src/test/spirv_run/ntid.ll @@ -4,13 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__sreg_ntid(i8) #0 define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #1 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"15" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"26", align 8 @@ -20,9 +23,8 @@ define protected amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"25", ptr %"17" = load i32, ptr %"27", align 4 store i32 %"17", ptr addrspace(5) %"6", align 4 %"11" = call i32 @__zluda_ptx_impl__sreg_ntid(i8 0) - %0 = alloca i32, align 4, addrspace(5) - store i32 %"11", ptr addrspace(5) %0, align 4 - %"19" = load i32, ptr addrspace(5) %0, align 4 + store i32 %"11", ptr addrspace(5) %1, align 4 + %"19" = load i32, ptr addrspace(5) %1, align 4 store i32 %"19", ptr addrspace(5) %"7", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/or.ll b/ptx/src/test/spirv_run/or.ll index 7b4bd7f1..f9292050 100644 --- a/ptx/src/test/spirv_run/or.ll +++ b/ptx/src/test/spirv_run/or.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"30": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"22", ptr a store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"32" = getelementptr inbounds i8, ptr %"25", i64 8 - %"13" = load i64, ptr %"32", align 8 + %"31" = getelementptr inbounds i8, ptr %"25", i64 8 + %"13" = load i64, ptr %"31", align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"16" = load i64, ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/param_ptr.ll b/ptx/src/test/spirv_run/param_ptr.ll index cea098cd..75451de0 100644 --- a/ptx/src/test/spirv_run/param_ptr.ll +++ b/ptx/src/test/spirv_run/param_ptr.ll @@ -2,18 +2,20 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @param_ptr(ptr addrspace(4) byref(i64) %"21", ptr addrspace(4) byref(i64) %"22") #0 { -"28": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"24" = ptrtoint ptr addrspace(4) %"21" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"24", ptr addrspace(5) %0, align 8 - %"23" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"24", ptr addrspace(5) %1, align 8 + %"23" = load i64, ptr addrspace(5) %1, align 8 store i64 %"23", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"12" to ptr addrspace(4) diff --git a/ptx/src/test/spirv_run/popc.ll b/ptx/src/test/spirv_run/popc.ll index be9c6250..15befc48 100644 --- a/ptx/src/test/spirv_run/popc.ll +++ b/ptx/src/test/spirv_run/popc.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/pred_not.ll b/ptx/src/test/spirv_run/pred_not.ll index 69f76469..8315512f 100644 --- a/ptx/src/test/spirv_run/pred_not.ll +++ b/ptx/src/test/spirv_run/pred_not.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"37", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", store i64 %"17", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"20" to ptr - %"43" = getelementptr inbounds i8, ptr %"39", i64 8 - %"19" = load i64, ptr %"43", align 8 + %"42" = getelementptr inbounds i8, ptr %"39", i64 8 + %"19" = load i64, ptr %"42", align 8 store i64 %"19", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"7", align 8 @@ -34,21 +38,19 @@ define protected amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"36", %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"10", label %"11" -"10": ; preds = %"41" - %0 = alloca i64, align 8, addrspace(5) - store i64 1, ptr addrspace(5) %0, align 8 - %"27" = load i64, ptr addrspace(5) %0, align 8 +"10": ; preds = %3 + store i64 1, ptr addrspace(5) %1, align 8 + %"27" = load i64, ptr addrspace(5) %1, align 8 store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"41" +"11": ; preds = %"10", %3 %"28" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"28", label %"13", label %"12" "12": ; preds = %"11" - %1 = alloca i64, align 8, addrspace(5) - store i64 2, ptr addrspace(5) %1, align 8 - %"29" = load i64, ptr addrspace(5) %1, align 8 + store i64 2, ptr addrspace(5) %2, align 8 + %"29" = load i64, ptr addrspace(5) %2, align 8 store i64 %"29", ptr addrspace(5) %"8", align 8 br label %"13" diff --git a/ptx/src/test/spirv_run/prmt.ll b/ptx/src/test/spirv_run/prmt.ll index bdcb12d7..76efedcd 100644 --- a/ptx/src/test/spirv_run/prmt.ll +++ b/ptx/src/test/spirv_run/prmt.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 { -"43": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"31", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"32", align 8 @@ -21,28 +23,28 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr store i32 %"13", ptr addrspace(5) %"6", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"34" = inttoptr i64 %"16" to ptr - %"45" = getelementptr inbounds i8, ptr %"34", i64 4 - %"15" = load i32, ptr %"45", align 4 + %"44" = getelementptr inbounds i8, ptr %"34", i64 4 + %"15" = load i32, ptr %"44", align 4 store i32 %"15", ptr addrspace(5) %"7", align 4 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"19" = load i32, ptr addrspace(5) %"7", align 4 - %0 = bitcast i32 %"18" to <4 x i8> - %1 = bitcast i32 %"19" to <4 x i8> - %2 = shufflevector <4 x i8> %0, <4 x i8> %1, <4 x i32> - %"35" = bitcast <4 x i8> %2 to i32 + %2 = bitcast i32 %"18" to <4 x i8> + %3 = bitcast i32 %"19" to <4 x i8> + %4 = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> + %"35" = bitcast <4 x i8> %4 to i32 store i32 %"35", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 - %3 = bitcast i32 %"21" to <4 x i8> - %4 = bitcast i32 %"22" to <4 x i8> - %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> - %6 = extractelement <4 x i8> %5, i32 0 - %7 = ashr i8 %6, 7 - %8 = insertelement <4 x i8> %5, i8 %7, i32 0 - %9 = extractelement <4 x i8> %8, i32 2 - %10 = ashr i8 %9, 7 - %11 = insertelement <4 x i8> %8, i8 %10, i32 2 - %"38" = bitcast <4 x i8> %11 to i32 + %5 = bitcast i32 %"21" to <4 x i8> + %6 = bitcast i32 %"22" to <4 x i8> + %7 = shufflevector <4 x i8> %5, <4 x i8> %6, <4 x i32> + %8 = extractelement <4 x i8> %7, i32 0 + %9 = ashr i8 %8, 7 + %10 = insertelement <4 x i8> %7, i8 %9, i32 0 + %11 = extractelement <4 x i8> %10, i32 2 + %12 = ashr i8 %11, 7 + %13 = insertelement <4 x i8> %10, i8 %12, i32 2 + %"38" = bitcast <4 x i8> %13 to i32 store i32 %"38", ptr addrspace(5) %"9", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"8", align 4 @@ -51,8 +53,8 @@ define protected amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"31", ptr %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"9", align 4 %"42" = inttoptr i64 %"25" to ptr - %"47" = getelementptr inbounds i8, ptr %"42", i64 4 - store i32 %"26", ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"42", i64 4 + store i32 %"26", ptr %"46", align 4 ret void } diff --git a/ptx/src/test/spirv_run/prmt_non_immediate.ll b/ptx/src/test/spirv_run/prmt_non_immediate.ll index d503917e..104c56d7 100644 --- a/ptx/src/test/spirv_run/prmt_non_immediate.ll +++ b/ptx/src/test/spirv_run/prmt_non_immediate.ll @@ -2,14 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i64) %"25", ptr addrspace(4) byref(i64) %"26") #0 { -"33": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"26", align 8 @@ -20,19 +23,18 @@ define protected amdgpu_kernel void @prmt_non_immediate(ptr addrspace(4) byref(i store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"28" = inttoptr i64 %"15" to ptr - %"35" = getelementptr inbounds i8, ptr %"28", i64 4 - %"14" = load i32, ptr %"35", align 4 + %"34" = getelementptr inbounds i8, ptr %"28", i64 4 + %"14" = load i32, ptr %"34", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 - %0 = alloca i32, align 4, addrspace(5) - store i32 64, ptr addrspace(5) %0, align 4 - %"16" = load i32, ptr addrspace(5) %0, align 4 + store i32 64, ptr addrspace(5) %1, align 4 + %"16" = load i32, ptr addrspace(5) %1, align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"18" = load i32, ptr addrspace(5) %"6", align 4 %"19" = load i32, ptr addrspace(5) %"7", align 4 - %1 = bitcast i32 %"18" to <4 x i8> - %2 = bitcast i32 %"19" to <4 x i8> - %3 = shufflevector <4 x i8> %1, <4 x i8> %2, <4 x i32> - %"29" = bitcast <4 x i8> %3 to i32 + %3 = bitcast i32 %"18" to <4 x i8> + %4 = bitcast i32 %"19" to <4 x i8> + %5 = shufflevector <4 x i8> %3, <4 x i8> %4, <4 x i32> + %"29" = bitcast <4 x i8> %5 to i32 store i32 %"29", ptr addrspace(5) %"7", align 4 %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/rcp.ll b/ptx/src/test/spirv_run/rcp.ll index 116687bb..dc034164 100644 --- a/ptx/src/test/spirv_run/rcp.ll +++ b/ptx/src/test/spirv_run/rcp.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/reg_local.ll b/ptx/src/test/spirv_run/reg_local.ll index 48c881d1..52bb3d12 100644 --- a/ptx/src/test/spirv_run/reg_local.ll +++ b/ptx/src/test/spirv_run/reg_local.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"33": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca [8 x i8], align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"5", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -22,14 +24,14 @@ define protected amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"23" %"27" = addrspacecast ptr addrspace(5) %"4" to ptr store i64 %"18", ptr %"27", align 8 %"29" = addrspacecast ptr addrspace(5) %"4" to ptr - %"37" = getelementptr inbounds i8, ptr %"29", i64 0 - %"30" = load i64, ptr %"37", align 8 + %"36" = getelementptr inbounds i8, ptr %"29", i64 0 + %"30" = load i64, ptr %"36", align 8 store i64 %"30", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"6", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 %"31" = inttoptr i64 %"15" to ptr addrspace(1) - %"39" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 - store i64 %"16", ptr addrspace(1) %"39", align 8 + %"38" = getelementptr inbounds i8, ptr addrspace(1) %"31", i64 0 + store i64 %"16", ptr addrspace(1) %"38", align 8 ret void } diff --git a/ptx/src/test/spirv_run/rem.ll b/ptx/src/test/spirv_run/rem.ll index 4535f498..0fb9cd87 100644 --- a/ptx/src/test/spirv_run/rem.ll +++ b/ptx/src/test/spirv_run/rem.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/rsqrt.ll b/ptx/src/test/spirv_run/rsqrt.ll index 77972600..40833ac2 100644 --- a/ptx/src/test/spirv_run/rsqrt.ll +++ b/ptx/src/test/spirv_run/rsqrt.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca double, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 @@ -17,8 +19,8 @@ define protected amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"16", pt %"10" = load double, ptr %"18", align 8 store double %"10", ptr addrspace(5) %"6", align 8 %"13" = load double, ptr addrspace(5) %"6", align 8 - %0 = call afn double @llvm.sqrt.f64(double %"13") - %"12" = fdiv arcp afn double 1.000000e+00, %0 + %2 = call afn double @llvm.sqrt.f64(double %"13") + %"12" = fdiv arcp afn double 1.000000e+00, %2 store double %"12", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"5", align 8 %"15" = load double, ptr addrspace(5) %"6", align 8 diff --git a/ptx/src/test/spirv_run/s64_min.ll b/ptx/src/test/spirv_run/s64_min.ll index 98eee04f..a96f0a4a 100644 --- a/ptx/src/test/spirv_run/s64_min.ll +++ b/ptx/src/test/spirv_run/s64_min.ll @@ -2,16 +2,18 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @s64_min(ptr addrspace(4) byref(i64) %"12", ptr addrspace(4) byref(i64) %"13") #0 { -"15": %"6" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"6", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"6", align 1 %"7" = load i64, ptr addrspace(4) %"13", align 8 store i64 %"7", ptr addrspace(5) %"4", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 -9223372036854775808, ptr addrspace(5) %0, align 8 - %"8" = load i64, ptr addrspace(5) %0, align 8 + store i64 -9223372036854775808, ptr addrspace(5) %1, align 8 + %"8" = load i64, ptr addrspace(5) %1, align 8 store i64 %"8", ptr addrspace(5) %"5", align 8 %"9" = load i64, ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(5) %"5", align 8 diff --git a/ptx/src/test/spirv_run/sad.ll b/ptx/src/test/spirv_run/sad.ll index c7a5726a..aa65fce0 100644 --- a/ptx/src/test/spirv_run/sad.ll +++ b/ptx/src/test/spirv_run/sad.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { -"56": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"38", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"39", align 8 @@ -22,31 +24,31 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr store i32 %"40", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"42" = inttoptr i64 %"17" to ptr - %"58" = getelementptr inbounds i8, ptr %"42", i64 4 - %"43" = load i32, ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"42", i64 4 + %"43" = load i32, ptr %"57", align 4 store i32 %"43", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"19" to ptr - %"60" = getelementptr inbounds i8, ptr %"44", i64 8 - %"45" = load i32, ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"44", i64 8 + %"45" = load i32, ptr %"59", align 4 store i32 %"45", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 %"23" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"21", %"22" - %1 = sub i32 %"21", %"22" - %2 = sub i32 %"22", %"21" - %3 = select i1 %0, i32 %1, i32 %2 - %"46" = add i32 %"23", %3 + %2 = icmp ugt i32 %"21", %"22" + %3 = sub i32 %"21", %"22" + %4 = sub i32 %"22", %"21" + %5 = select i1 %2, i32 %3, i32 %4 + %"46" = add i32 %"23", %5 store i32 %"46", ptr addrspace(5) %"9", align 4 %"25" = load i32, ptr addrspace(5) %"6", align 4 %"26" = load i32, ptr addrspace(5) %"7", align 4 %"27" = load i32, ptr addrspace(5) %"8", align 4 - %4 = icmp sgt i32 %"25", %"26" - %5 = sub i32 %"25", %"26" - %6 = sub i32 %"26", %"25" - %7 = select i1 %4, i32 %5, i32 %6 - %"50" = add i32 %"27", %7 + %6 = icmp sgt i32 %"25", %"26" + %7 = sub i32 %"25", %"26" + %8 = sub i32 %"26", %"25" + %9 = select i1 %6, i32 %7, i32 %8 + %"50" = add i32 %"27", %9 store i32 %"50", ptr addrspace(5) %"10", align 4 %"28" = load i64, ptr addrspace(5) %"5", align 8 %"29" = load i32, ptr addrspace(5) %"9", align 4 @@ -55,8 +57,8 @@ define protected amdgpu_kernel void @sad(ptr addrspace(4) byref(i64) %"38", ptr %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"10", align 4 %"55" = inttoptr i64 %"30" to ptr - %"62" = getelementptr inbounds i8, ptr %"55", i64 4 - store i32 %"31", ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"55", i64 4 + store i32 %"31", ptr %"61", align 4 ret void } diff --git a/ptx/src/test/spirv_run/selp.ll b/ptx/src/test/spirv_run/selp.ll index 073ec387..0e20d6d2 100644 --- a/ptx/src/test/spirv_run/selp.ll +++ b/ptx/src/test/spirv_run/selp.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"23", ptr store i16 %"11", ptr addrspace(5) %"6", align 2 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 2 - %"13" = load i16, ptr %"30", align 2 + %"29" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"29", align 2 store i16 %"13", ptr addrspace(5) %"7", align 2 %"16" = load i16, ptr addrspace(5) %"6", align 2 %"17" = load i16, ptr addrspace(5) %"7", align 2 diff --git a/ptx/src/test/spirv_run/selp_true.ll b/ptx/src/test/spirv_run/selp_true.ll index 4eda9817..9b6b41a9 100644 --- a/ptx/src/test/spirv_run/selp_true.ll +++ b/ptx/src/test/spirv_run/selp_true.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23", ptr addrspace(4) byref(i64) %"24") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) %"7" = alloca i16, align 2, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"24", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"23" store i16 %"11", ptr addrspace(5) %"6", align 2 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"26" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"26", i64 2 - %"13" = load i16, ptr %"30", align 2 + %"29" = getelementptr inbounds i8, ptr %"26", i64 2 + %"13" = load i16, ptr %"29", align 2 store i16 %"13", ptr addrspace(5) %"7", align 2 %"16" = load i16, ptr addrspace(5) %"6", align 2 %"17" = load i16, ptr addrspace(5) %"7", align 2 diff --git a/ptx/src/test/spirv_run/set_f16x2.ll b/ptx/src/test/spirv_run/set_f16x2.ll index 2a8caf3a..d6bf7e0f 100644 --- a/ptx/src/test/spirv_run/set_f16x2.ll +++ b/ptx/src/test/spirv_run/set_f16x2.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { -"58": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca <2 x half>, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"40", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"41", align 8 @@ -22,33 +24,33 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" store i32 %"42", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"44" = inttoptr i64 %"17" to ptr - %"60" = getelementptr inbounds i8, ptr %"44", i64 4 - %"45" = load i32, ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"44", i64 4 + %"45" = load i32, ptr %"59", align 4 store i32 %"45", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"46" = inttoptr i64 %"19" to ptr - %"62" = getelementptr inbounds i8, ptr %"46", i64 8 - %"47" = load i32, ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"46", i64 8 + %"47" = load i32, ptr %"61", align 4 store i32 %"47", ptr addrspace(5) %"8", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"48" = inttoptr i64 %"21" to ptr - %"64" = getelementptr inbounds i8, ptr %"48", i64 12 - %"49" = load i32, ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"48", i64 12 + %"49" = load i32, ptr %"63", align 4 store i32 %"49", ptr addrspace(5) %"9", align 4 %"23" = load i32, ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"7", align 4 %"51" = bitcast i32 %"23" to <2 x half> %"52" = bitcast i32 %"24" to <2 x half> - %0 = fcmp ugt <2 x half> %"51", %"52" - %1 = sext <2 x i1> %0 to <2 x i16> - %"50" = bitcast <2 x i16> %1 to i32 + %2 = fcmp ugt <2 x half> %"51", %"52" + %3 = sext <2 x i1> %2 to <2 x i16> + %"50" = bitcast <2 x i16> %3 to i32 store i32 %"50", ptr addrspace(5) %"6", align 4 %"26" = load i32, ptr addrspace(5) %"8", align 4 %"27" = load i32, ptr addrspace(5) %"9", align 4 %"54" = bitcast i32 %"26" to <2 x half> %"55" = bitcast i32 %"27" to <2 x half> - %2 = fcmp oeq <2 x half> %"54", %"55" - %"53" = uitofp <2 x i1> %2 to <2 x half> + %4 = fcmp oeq <2 x half> %"54", %"55" + %"53" = uitofp <2 x i1> %4 to <2 x half> %"25" = bitcast <2 x half> %"53" to i32 store i32 %"25", ptr addrspace(5) %"8", align 4 %"28" = load i64, ptr addrspace(5) %"5", align 8 @@ -58,8 +60,8 @@ define protected amdgpu_kernel void @set_f16x2(ptr addrspace(4) byref(i64) %"40" %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"8", align 4 %"57" = inttoptr i64 %"30" to ptr - %"66" = getelementptr inbounds i8, ptr %"57", i64 4 - store i32 %"31", ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"57", i64 4 + store i32 %"31", ptr %"65", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp.ll b/ptx/src/test/spirv_run/setp.ll index 2f955566..1e9e1e54 100644 --- a/ptx/src/test/spirv_run/setp.ll +++ b/ptx/src/test/spirv_run/setp.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + %2 = alloca i64, align 8, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr store i64 %"17", ptr addrspace(5) %"6", align 8 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 8 - %"19" = load i64, ptr %"41", align 8 + %"40" = getelementptr inbounds i8, ptr %"37", i64 8 + %"19" = load i64, ptr %"40", align 8 store i64 %"19", ptr addrspace(5) %"7", align 8 %"22" = load i64, ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"7", align 8 @@ -31,21 +35,19 @@ define protected amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"34", ptr %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" - %0 = alloca i64, align 8, addrspace(5) - store i64 1, ptr addrspace(5) %0, align 8 - %"25" = load i64, ptr addrspace(5) %0, align 8 +"10": ; preds = %3 + store i64 1, ptr addrspace(5) %1, align 8 + %"25" = load i64, ptr addrspace(5) %1, align 8 store i64 %"25", ptr addrspace(5) %"8", align 8 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"13", label %"12" "12": ; preds = %"11" - %1 = alloca i64, align 8, addrspace(5) - store i64 2, ptr addrspace(5) %1, align 8 - %"27" = load i64, ptr addrspace(5) %1, align 8 + store i64 2, ptr addrspace(5) %2, align 8 + %"27" = load i64, ptr addrspace(5) %2, align 8 store i64 %"27", ptr addrspace(5) %"8", align 8 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_bool.ll b/ptx/src/test/spirv_run/setp_bool.ll index ac1b2bb2..f0b659f4 100644 --- a/ptx/src/test/spirv_run/setp_bool.ll +++ b/ptx/src/test/spirv_run/setp_bool.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { -"50": %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -13,6 +11,13 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44" %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) %"11" = alloca i1, align 1, addrspace(5) + %1 = alloca i1, align 1, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + %3 = alloca float, align 4, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"16", align 1 %"17" = load i64, ptr addrspace(4) %"44", align 8 store i64 %"17", ptr addrspace(5) %"4", align 8 %"18" = load i64, ptr addrspace(4) %"45", align 8 @@ -23,47 +28,44 @@ define protected amdgpu_kernel void @setp_bool(ptr addrspace(4) byref(i64) %"44" store float %"19", ptr addrspace(5) %"6", align 4 %"22" = load i64, ptr addrspace(5) %"4", align 8 %"47" = inttoptr i64 %"22" to ptr - %"52" = getelementptr inbounds i8, ptr %"47", i64 4 - %"21" = load float, ptr %"52", align 4 + %"51" = getelementptr inbounds i8, ptr %"47", i64 4 + %"21" = load float, ptr %"51", align 4 store float %"21", ptr addrspace(5) %"7", align 4 %"24" = load i64, ptr addrspace(5) %"4", align 8 %"48" = inttoptr i64 %"24" to ptr - %"54" = getelementptr inbounds i8, ptr %"48", i64 8 - %"23" = load float, ptr %"54", align 4 + %"53" = getelementptr inbounds i8, ptr %"48", i64 8 + %"23" = load float, ptr %"53", align 4 store float %"23", ptr addrspace(5) %"8", align 4 - %0 = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %0, align 1 - %"25" = load i1, ptr addrspace(5) %0, align 1 + store i1 false, ptr addrspace(5) %1, align 1 + %"25" = load i1, ptr addrspace(5) %1, align 1 store i1 %"25", ptr addrspace(5) %"9", align 1 %"28" = load float, ptr addrspace(5) %"6", align 4 %"29" = load float, ptr addrspace(5) %"7", align 4 %"30" = load i1, ptr addrspace(5) %"9", align 1 - %1 = fcmp ogt float %"28", %"29" - %2 = xor i1 %1, true - %"26" = and i1 %1, %"30" - %"27" = and i1 %2, %"30" + %5 = fcmp ogt float %"28", %"29" + %6 = xor i1 %5, true + %"26" = and i1 %5, %"30" + %"27" = and i1 %6, %"30" store i1 %"26", ptr addrspace(5) %"10", align 1 store i1 %"27", ptr addrspace(5) %"11", align 1 %"31" = load i1, ptr addrspace(5) %"10", align 1 br i1 %"31", label %"12", label %"13" -"12": ; preds = %"50" +"12": ; preds = %4 %"33" = load float, ptr addrspace(5) %"6", align 4 - %3 = alloca float, align 4, addrspace(5) - store float %"33", ptr addrspace(5) %3, align 4 - %"32" = load float, ptr addrspace(5) %3, align 4 + store float %"33", ptr addrspace(5) %2, align 4 + %"32" = load float, ptr addrspace(5) %2, align 4 store float %"32", ptr addrspace(5) %"8", align 4 br label %"13" -"13": ; preds = %"12", %"50" +"13": ; preds = %"12", %4 %"34" = load i1, ptr addrspace(5) %"11", align 1 br i1 %"34", label %"14", label %"15" "14": ; preds = %"13" %"36" = load float, ptr addrspace(5) %"7", align 4 - %4 = alloca float, align 4, addrspace(5) - store float %"36", ptr addrspace(5) %4, align 4 - %"35" = load float, ptr addrspace(5) %4, align 4 + store float %"36", ptr addrspace(5) %3, align 4 + %"35" = load float, ptr addrspace(5) %3, align 4 store float %"35", ptr addrspace(5) %"8", align 4 br label %"15" diff --git a/ptx/src/test/spirv_run/setp_gt.ll b/ptx/src/test/spirv_run/setp_gt.ll index 3a8b965f..dbaf20a3 100644 --- a/ptx/src/test/spirv_run/setp_gt.ll +++ b/ptx/src/test/spirv_run/setp_gt.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", store float %"17", ptr addrspace(5) %"6", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 4 - %"19" = load float, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"40", align 4 store float %"19", ptr addrspace(5) %"7", align 4 %"22" = load float, ptr addrspace(5) %"6", align 4 %"23" = load float, ptr addrspace(5) %"7", align 4 @@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"34", %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" +"10": ; preds = %3 %"26" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"26", ptr addrspace(5) %0, align 4 - %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"26", ptr addrspace(5) %1, align 4 + %"25" = load float, ptr addrspace(5) %1, align 4 store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"27" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" %"29" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %1, align 4 - %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"29", ptr addrspace(5) %2, align 4 + %"28" = load float, ptr addrspace(5) %2, align 4 store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_leu.ll b/ptx/src/test/spirv_run/setp_leu.ll index 9699fde5..d27b96a9 100644 --- a/ptx/src/test/spirv_run/setp_leu.ll +++ b/ptx/src/test/spirv_run/setp_leu.ll @@ -2,15 +2,19 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { -"39": %"14" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"14", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) %"7" = alloca float, align 4, addrspace(5) %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"14", align 1 %"15" = load i64, ptr addrspace(4) %"34", align 8 store i64 %"15", ptr addrspace(5) %"4", align 8 %"16" = load i64, ptr addrspace(4) %"35", align 8 @@ -21,8 +25,8 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", store float %"17", ptr addrspace(5) %"6", align 4 %"20" = load i64, ptr addrspace(5) %"4", align 8 %"37" = inttoptr i64 %"20" to ptr - %"41" = getelementptr inbounds i8, ptr %"37", i64 4 - %"19" = load float, ptr %"41", align 4 + %"40" = getelementptr inbounds i8, ptr %"37", i64 4 + %"19" = load float, ptr %"40", align 4 store float %"19", ptr addrspace(5) %"7", align 4 %"22" = load float, ptr addrspace(5) %"6", align 4 %"23" = load float, ptr addrspace(5) %"7", align 4 @@ -31,23 +35,21 @@ define protected amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"34", %"24" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"24", label %"10", label %"11" -"10": ; preds = %"39" +"10": ; preds = %3 %"26" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"26", ptr addrspace(5) %0, align 4 - %"25" = load float, ptr addrspace(5) %0, align 4 + store float %"26", ptr addrspace(5) %1, align 4 + %"25" = load float, ptr addrspace(5) %1, align 4 store float %"25", ptr addrspace(5) %"8", align 4 br label %"11" -"11": ; preds = %"10", %"39" +"11": ; preds = %"10", %3 %"27" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"27", label %"13", label %"12" "12": ; preds = %"11" %"29" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"29", ptr addrspace(5) %1, align 4 - %"28" = load float, ptr addrspace(5) %1, align 4 + store float %"29", ptr addrspace(5) %2, align 4 + %"28" = load float, ptr addrspace(5) %2, align 4 store float %"28", ptr addrspace(5) %"8", align 4 br label %"13" diff --git a/ptx/src/test/spirv_run/setp_nan.ll b/ptx/src/test/spirv_run/setp_nan.ll index 13683868..709ed89d 100644 --- a/ptx/src/test/spirv_run/setp_nan.ll +++ b/ptx/src/test/spirv_run/setp_nan.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { -"129": %"32" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"32", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca i32, align 4, addrspace(5) + %7 = alloca i32, align 4, addrspace(5) + %8 = alloca i32, align 4, addrspace(5) + br label %9 + +9: ; preds = %0 + store i1 false, ptr addrspace(5) %"32", align 1 %"33" = load i64, ptr addrspace(4) %"115", align 8 store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 @@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" store float %"35", ptr addrspace(5) %"6", align 4 %"38" = load i64, ptr addrspace(5) %"4", align 8 %"118" = inttoptr i64 %"38" to ptr - %"131" = getelementptr inbounds i8, ptr %"118", i64 4 - %"37" = load float, ptr %"131", align 4 + %"130" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"130", align 4 store float %"37", ptr addrspace(5) %"7", align 4 %"40" = load i64, ptr addrspace(5) %"4", align 8 %"119" = inttoptr i64 %"40" to ptr - %"133" = getelementptr inbounds i8, ptr %"119", i64 8 - %"39" = load float, ptr %"133", align 4 + %"132" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"132", align 4 store float %"39", ptr addrspace(5) %"8", align 4 %"42" = load i64, ptr addrspace(5) %"4", align 8 %"120" = inttoptr i64 %"42" to ptr - %"135" = getelementptr inbounds i8, ptr %"120", i64 12 - %"41" = load float, ptr %"135", align 4 + %"134" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"134", align 4 store float %"41", ptr addrspace(5) %"9", align 4 %"44" = load i64, ptr addrspace(5) %"4", align 8 %"121" = inttoptr i64 %"44" to ptr - %"137" = getelementptr inbounds i8, ptr %"121", i64 16 - %"43" = load float, ptr %"137", align 4 + %"136" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"136", align 4 store float %"43", ptr addrspace(5) %"10", align 4 %"46" = load i64, ptr addrspace(5) %"4", align 8 %"122" = inttoptr i64 %"46" to ptr - %"139" = getelementptr inbounds i8, ptr %"122", i64 20 - %"45" = load float, ptr %"139", align 4 + %"138" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"138", align 4 store float %"45", ptr addrspace(5) %"11", align 4 %"48" = load i64, ptr addrspace(5) %"4", align 8 %"123" = inttoptr i64 %"48" to ptr - %"141" = getelementptr inbounds i8, ptr %"123", i64 24 - %"47" = load float, ptr %"141", align 4 + %"140" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"140", align 4 store float %"47", ptr addrspace(5) %"12", align 4 %"50" = load i64, ptr addrspace(5) %"4", align 8 %"124" = inttoptr i64 %"50" to ptr - %"143" = getelementptr inbounds i8, ptr %"124", i64 28 - %"49" = load float, ptr %"143", align 4 + %"142" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"142", align 4 store float %"49", ptr addrspace(5) %"13", align 4 %"52" = load float, ptr addrspace(5) %"6", align 4 %"53" = load float, ptr addrspace(5) %"7", align 4 @@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"54" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"54", label %"16", label %"17" -"16": ; preds = %"129" - %0 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %0, align 4 - %"55" = load i32, ptr addrspace(5) %0, align 4 +"16": ; preds = %9 + store i32 1, ptr addrspace(5) %1, align 4 + %"55" = load i32, ptr addrspace(5) %1, align 4 store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"129" +"17": ; preds = %"16", %9 %"56" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" - %1 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %1, align 4 - %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 0, ptr addrspace(5) %2, align 4 + %"57" = load i32, ptr addrspace(5) %2, align 4 store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" @@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" - %2 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %2, align 4 - %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 1, ptr addrspace(5) %3, align 4 + %"64" = load i32, ptr addrspace(5) %3, align 4 store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" @@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" - %3 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %3, align 4 - %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 0, ptr addrspace(5) %4, align 4 + %"66" = load i32, ptr addrspace(5) %4, align 4 store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" @@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"67" = load i64, ptr addrspace(5) %"5", align 8 %"68" = load i32, ptr addrspace(5) %"14", align 4 %"126" = inttoptr i64 %"67" to ptr - %"145" = getelementptr inbounds i8, ptr %"126", i64 4 - store i32 %"68", ptr %"145", align 4 + %"144" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"144", align 4 %"70" = load float, ptr addrspace(5) %"10", align 4 %"71" = load float, ptr addrspace(5) %"11", align 4 %"69" = fcmp uno float %"70", %"71" @@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" - %4 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %4, align 4 - %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 1, ptr addrspace(5) %5, align 4 + %"73" = load i32, ptr addrspace(5) %5, align 4 store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" @@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" - %5 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %5, align 4 - %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 0, ptr addrspace(5) %6, align 4 + %"75" = load i32, ptr addrspace(5) %6, align 4 store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" @@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"76" = load i64, ptr addrspace(5) %"5", align 8 %"77" = load i32, ptr addrspace(5) %"14", align 4 %"127" = inttoptr i64 %"76" to ptr - %"147" = getelementptr inbounds i8, ptr %"127", i64 8 - store i32 %"77", ptr %"147", align 4 + %"146" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"146", align 4 %"79" = load float, ptr addrspace(5) %"12", align 4 %"80" = load float, ptr addrspace(5) %"13", align 4 %"78" = fcmp uno float %"79", %"80" @@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" - %6 = alloca i32, align 4, addrspace(5) - store i32 1, ptr addrspace(5) %6, align 4 - %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 1, ptr addrspace(5) %7, align 4 + %"82" = load i32, ptr addrspace(5) %7, align 4 store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" @@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" - %7 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %7, align 4 - %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 0, ptr addrspace(5) %8, align 4 + %"84" = load i32, ptr addrspace(5) %8, align 4 store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" @@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"115" %"85" = load i64, ptr addrspace(5) %"5", align 8 %"86" = load i32, ptr addrspace(5) %"14", align 4 %"128" = inttoptr i64 %"85" to ptr - %"149" = getelementptr inbounds i8, ptr %"128", i64 12 - store i32 %"86", ptr %"149", align 4 + %"148" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"148", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_num.ll b/ptx/src/test/spirv_run/setp_num.ll index a6254a2f..bebecc44 100644 --- a/ptx/src/test/spirv_run/setp_num.ll +++ b/ptx/src/test/spirv_run/setp_num.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115", ptr addrspace(4) byref(i64) %"116") #0 { -"129": %"32" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"32", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -17,6 +15,18 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"13" = alloca float, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i1, align 1, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca i32, align 4, addrspace(5) + %7 = alloca i32, align 4, addrspace(5) + %8 = alloca i32, align 4, addrspace(5) + br label %9 + +9: ; preds = %0 + store i1 false, ptr addrspace(5) %"32", align 1 %"33" = load i64, ptr addrspace(4) %"115", align 8 store i64 %"33", ptr addrspace(5) %"4", align 8 %"34" = load i64, ptr addrspace(4) %"116", align 8 @@ -27,38 +37,38 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" store float %"35", ptr addrspace(5) %"6", align 4 %"38" = load i64, ptr addrspace(5) %"4", align 8 %"118" = inttoptr i64 %"38" to ptr - %"131" = getelementptr inbounds i8, ptr %"118", i64 4 - %"37" = load float, ptr %"131", align 4 + %"130" = getelementptr inbounds i8, ptr %"118", i64 4 + %"37" = load float, ptr %"130", align 4 store float %"37", ptr addrspace(5) %"7", align 4 %"40" = load i64, ptr addrspace(5) %"4", align 8 %"119" = inttoptr i64 %"40" to ptr - %"133" = getelementptr inbounds i8, ptr %"119", i64 8 - %"39" = load float, ptr %"133", align 4 + %"132" = getelementptr inbounds i8, ptr %"119", i64 8 + %"39" = load float, ptr %"132", align 4 store float %"39", ptr addrspace(5) %"8", align 4 %"42" = load i64, ptr addrspace(5) %"4", align 8 %"120" = inttoptr i64 %"42" to ptr - %"135" = getelementptr inbounds i8, ptr %"120", i64 12 - %"41" = load float, ptr %"135", align 4 + %"134" = getelementptr inbounds i8, ptr %"120", i64 12 + %"41" = load float, ptr %"134", align 4 store float %"41", ptr addrspace(5) %"9", align 4 %"44" = load i64, ptr addrspace(5) %"4", align 8 %"121" = inttoptr i64 %"44" to ptr - %"137" = getelementptr inbounds i8, ptr %"121", i64 16 - %"43" = load float, ptr %"137", align 4 + %"136" = getelementptr inbounds i8, ptr %"121", i64 16 + %"43" = load float, ptr %"136", align 4 store float %"43", ptr addrspace(5) %"10", align 4 %"46" = load i64, ptr addrspace(5) %"4", align 8 %"122" = inttoptr i64 %"46" to ptr - %"139" = getelementptr inbounds i8, ptr %"122", i64 20 - %"45" = load float, ptr %"139", align 4 + %"138" = getelementptr inbounds i8, ptr %"122", i64 20 + %"45" = load float, ptr %"138", align 4 store float %"45", ptr addrspace(5) %"11", align 4 %"48" = load i64, ptr addrspace(5) %"4", align 8 %"123" = inttoptr i64 %"48" to ptr - %"141" = getelementptr inbounds i8, ptr %"123", i64 24 - %"47" = load float, ptr %"141", align 4 + %"140" = getelementptr inbounds i8, ptr %"123", i64 24 + %"47" = load float, ptr %"140", align 4 store float %"47", ptr addrspace(5) %"12", align 4 %"50" = load i64, ptr addrspace(5) %"4", align 8 %"124" = inttoptr i64 %"50" to ptr - %"143" = getelementptr inbounds i8, ptr %"124", i64 28 - %"49" = load float, ptr %"143", align 4 + %"142" = getelementptr inbounds i8, ptr %"124", i64 28 + %"49" = load float, ptr %"142", align 4 store float %"49", ptr addrspace(5) %"13", align 4 %"52" = load float, ptr addrspace(5) %"6", align 4 %"53" = load float, ptr addrspace(5) %"7", align 4 @@ -67,21 +77,19 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"54" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"54", label %"16", label %"17" -"16": ; preds = %"129" - %0 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %0, align 4 - %"55" = load i32, ptr addrspace(5) %0, align 4 +"16": ; preds = %9 + store i32 2, ptr addrspace(5) %1, align 4 + %"55" = load i32, ptr addrspace(5) %1, align 4 store i32 %"55", ptr addrspace(5) %"14", align 4 br label %"17" -"17": ; preds = %"16", %"129" +"17": ; preds = %"16", %9 %"56" = load i1, ptr addrspace(5) %"15", align 1 br i1 %"56", label %"19", label %"18" "18": ; preds = %"17" - %1 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %1, align 4 - %"57" = load i32, ptr addrspace(5) %1, align 4 + store i32 0, ptr addrspace(5) %2, align 4 + %"57" = load i32, ptr addrspace(5) %2, align 4 store i32 %"57", ptr addrspace(5) %"14", align 4 br label %"19" @@ -98,9 +106,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"63", label %"20", label %"21" "20": ; preds = %"19" - %2 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %2, align 4 - %"64" = load i32, ptr addrspace(5) %2, align 4 + store i32 2, ptr addrspace(5) %3, align 4 + %"64" = load i32, ptr addrspace(5) %3, align 4 store i32 %"64", ptr addrspace(5) %"14", align 4 br label %"21" @@ -109,9 +116,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"65", label %"23", label %"22" "22": ; preds = %"21" - %3 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %3, align 4 - %"66" = load i32, ptr addrspace(5) %3, align 4 + store i32 0, ptr addrspace(5) %4, align 4 + %"66" = load i32, ptr addrspace(5) %4, align 4 store i32 %"66", ptr addrspace(5) %"14", align 4 br label %"23" @@ -119,8 +125,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"67" = load i64, ptr addrspace(5) %"5", align 8 %"68" = load i32, ptr addrspace(5) %"14", align 4 %"126" = inttoptr i64 %"67" to ptr - %"145" = getelementptr inbounds i8, ptr %"126", i64 4 - store i32 %"68", ptr %"145", align 4 + %"144" = getelementptr inbounds i8, ptr %"126", i64 4 + store i32 %"68", ptr %"144", align 4 %"70" = load float, ptr addrspace(5) %"10", align 4 %"71" = load float, ptr addrspace(5) %"11", align 4 %"69" = fcmp ord float %"70", %"71" @@ -129,9 +135,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"72", label %"24", label %"25" "24": ; preds = %"23" - %4 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %4, align 4 - %"73" = load i32, ptr addrspace(5) %4, align 4 + store i32 2, ptr addrspace(5) %5, align 4 + %"73" = load i32, ptr addrspace(5) %5, align 4 store i32 %"73", ptr addrspace(5) %"14", align 4 br label %"25" @@ -140,9 +145,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"74", label %"27", label %"26" "26": ; preds = %"25" - %5 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %5, align 4 - %"75" = load i32, ptr addrspace(5) %5, align 4 + store i32 0, ptr addrspace(5) %6, align 4 + %"75" = load i32, ptr addrspace(5) %6, align 4 store i32 %"75", ptr addrspace(5) %"14", align 4 br label %"27" @@ -150,8 +154,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"76" = load i64, ptr addrspace(5) %"5", align 8 %"77" = load i32, ptr addrspace(5) %"14", align 4 %"127" = inttoptr i64 %"76" to ptr - %"147" = getelementptr inbounds i8, ptr %"127", i64 8 - store i32 %"77", ptr %"147", align 4 + %"146" = getelementptr inbounds i8, ptr %"127", i64 8 + store i32 %"77", ptr %"146", align 4 %"79" = load float, ptr addrspace(5) %"12", align 4 %"80" = load float, ptr addrspace(5) %"13", align 4 %"78" = fcmp ord float %"79", %"80" @@ -160,9 +164,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"81", label %"28", label %"29" "28": ; preds = %"27" - %6 = alloca i32, align 4, addrspace(5) - store i32 2, ptr addrspace(5) %6, align 4 - %"82" = load i32, ptr addrspace(5) %6, align 4 + store i32 2, ptr addrspace(5) %7, align 4 + %"82" = load i32, ptr addrspace(5) %7, align 4 store i32 %"82", ptr addrspace(5) %"14", align 4 br label %"29" @@ -171,9 +174,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" br i1 %"83", label %"31", label %"30" "30": ; preds = %"29" - %7 = alloca i32, align 4, addrspace(5) - store i32 0, ptr addrspace(5) %7, align 4 - %"84" = load i32, ptr addrspace(5) %7, align 4 + store i32 0, ptr addrspace(5) %8, align 4 + %"84" = load i32, ptr addrspace(5) %8, align 4 store i32 %"84", ptr addrspace(5) %"14", align 4 br label %"31" @@ -181,8 +183,8 @@ define protected amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"115" %"85" = load i64, ptr addrspace(5) %"5", align 8 %"86" = load i32, ptr addrspace(5) %"14", align 4 %"128" = inttoptr i64 %"85" to ptr - %"149" = getelementptr inbounds i8, ptr %"128", i64 12 - store i32 %"86", ptr %"149", align 4 + %"148" = getelementptr inbounds i8, ptr %"128", i64 12 + store i32 %"86", ptr %"148", align 4 ret void } diff --git a/ptx/src/test/spirv_run/setp_pred2.ll b/ptx/src/test/spirv_run/setp_pred2.ll index 8220fc0c..01ae23e9 100644 --- a/ptx/src/test/spirv_run/setp_pred2.ll +++ b/ptx/src/test/spirv_run/setp_pred2.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"41": %"15" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"15", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) @@ -12,6 +10,12 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 %"8" = alloca float, align 4, addrspace(5) %"9" = alloca i1, align 1, addrspace(5) %"10" = alloca i1, align 1, addrspace(5) + %1 = alloca float, align 4, addrspace(5) + %2 = alloca float, align 4, addrspace(5) + br label %3 + +3: ; preds = %0 + store i1 false, ptr addrspace(5) %"15", align 1 %"16" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"16", ptr addrspace(5) %"4", align 8 %"17" = load i64, ptr addrspace(4) %"37", align 8 @@ -22,8 +26,8 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 store float %"18", ptr addrspace(5) %"6", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"21" to ptr - %"43" = getelementptr inbounds i8, ptr %"39", i64 4 - %"20" = load float, ptr %"43", align 4 + %"42" = getelementptr inbounds i8, ptr %"39", i64 4 + %"20" = load float, ptr %"42", align 4 store float %"20", ptr addrspace(5) %"7", align 4 %"24" = load float, ptr addrspace(5) %"6", align 4 %"25" = load float, ptr addrspace(5) %"7", align 4 @@ -34,23 +38,21 @@ define protected amdgpu_kernel void @setp_pred2(ptr addrspace(4) byref(i64) %"36 %"26" = load i1, ptr addrspace(5) %"9", align 1 br i1 %"26", label %"11", label %"12" -"11": ; preds = %"41" +"11": ; preds = %3 %"28" = load float, ptr addrspace(5) %"6", align 4 - %0 = alloca float, align 4, addrspace(5) - store float %"28", ptr addrspace(5) %0, align 4 - %"27" = load float, ptr addrspace(5) %0, align 4 + store float %"28", ptr addrspace(5) %1, align 4 + %"27" = load float, ptr addrspace(5) %1, align 4 store float %"27", ptr addrspace(5) %"8", align 4 br label %"12" -"12": ; preds = %"11", %"41" +"12": ; preds = %"11", %3 %"29" = load i1, ptr addrspace(5) %"10", align 1 br i1 %"29", label %"13", label %"14" "13": ; preds = %"12" %"31" = load float, ptr addrspace(5) %"7", align 4 - %1 = alloca float, align 4, addrspace(5) - store float %"31", ptr addrspace(5) %1, align 4 - %"30" = load float, ptr addrspace(5) %1, align 4 + store float %"31", ptr addrspace(5) %2, align 4 + %"30" = load float, ptr addrspace(5) %2, align 4 store float %"30", ptr addrspace(5) %"8", align 4 br label %"14" diff --git a/ptx/src/test/spirv_run/shared_ptr_32.ll b/ptx/src/test/spirv_run/shared_ptr_32.ll index 87059671..f3e02690 100644 --- a/ptx/src/test/spirv_run/shared_ptr_32.ll +++ b/ptx/src/test/spirv_run/shared_ptr_32.ll @@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"31": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"25", align 8 store i64 %"12", ptr addrspace(5) %"6", align 8 - %0 = alloca i32, align 4, addrspace(5) - store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %0, align 4 - %"13" = load i32, ptr addrspace(5) %0, align 4 + store i32 ptrtoint (ptr addrspace(3) @"4" to i32), ptr addrspace(5) %1, align 4 + %"13" = load i32, ptr addrspace(5) %1, align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"27" = inttoptr i64 %"15" to ptr addrspace(1) @@ -30,8 +32,8 @@ define protected amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) % store i64 %"17", ptr addrspace(3) %"28", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 %"29" = inttoptr i32 %"19" to ptr addrspace(3) - %"33" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 - %"18" = load i64, ptr addrspace(3) %"33", align 8 + %"32" = getelementptr inbounds i8, ptr addrspace(3) %"29", i64 0 + %"18" = load i64, ptr addrspace(3) %"32", align 8 store i64 %"18", ptr addrspace(5) %"9", align 8 %"20" = load i64, ptr addrspace(5) %"6", align 8 %"21" = load i64, ptr addrspace(5) %"9", align 8 diff --git a/ptx/src/test/spirv_run/shared_ptr_take_address.ll b/ptx/src/test/spirv_run/shared_ptr_take_address.ll index 6c430a2d..fd61d713 100644 --- a/ptx/src/test/spirv_run/shared_ptr_take_address.ll +++ b/ptx/src/test/spirv_run/shared_ptr_take_address.ll @@ -4,21 +4,23 @@ target triple = "amdgcn-amd-amdhsa" @shared_mem = external hidden addrspace(3) global [0 x i8], align 4 define protected amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"29": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"9" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"12" = load i64, ptr addrspace(4) %"23", align 8 store i64 %"12", ptr addrspace(5) %"6", align 8 - %0 = alloca i64, align 8, addrspace(5) - store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %0, align 8 - %"13" = load i64, ptr addrspace(5) %0, align 8 + store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %1, align 8 + %"13" = load i64, ptr addrspace(5) %1, align 8 store i64 %"13", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"25" = inttoptr i64 %"15" to ptr addrspace(1) diff --git a/ptx/src/test/spirv_run/shared_unify_decl.ll b/ptx/src/test/spirv_run/shared_unify_decl.ll index 4cc24fb3..61d62d77 100644 --- a/ptx/src/test/spirv_run/shared_unify_decl.ll +++ b/ptx/src/test/spirv_run/shared_unify_decl.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { -"59": +define private i64 @"3"(ptr addrspace(3) %"63", ptr addrspace(3) %"64") #0 { %"8" = alloca i64, align 8, addrspace(5) %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) - %"23" = load i64, ptr addrspace(3) %"67", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 + %"23" = load i64, ptr addrspace(3) %"64", align 8 store i64 %"23", ptr addrspace(5) %"9", align 8 - %"24" = load i64, ptr addrspace(3) %"66", align 8 + %"24" = load i64, ptr addrspace(3) %"63", align 8 store i64 %"24", ptr addrspace(5) %"10", align 8 %"26" = load i64, ptr addrspace(5) %"10", align 8 %"27" = load i64, ptr addrspace(5) %"9", align 8 @@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"66", ptr addrspace(3) %"67") #0 { ret i64 %"28" } -define private i64 @"5"(i64 %"29", ptr addrspace(3) %"68", ptr addrspace(3) %"69") #0 { -"60": +define private i64 @"5"(i64 %"29", ptr addrspace(3) %"65", ptr addrspace(3) %"66") #0 { %"12" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"21" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"21", align 1 + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"12", align 8 + store i1 false, ptr addrspace(5) %"21", align 1 %"30" = load i64, ptr addrspace(5) %"12", align 8 - store i64 %"30", ptr addrspace(3) %"68", align 8 - %"31" = call i64 @"3"(ptr addrspace(3) %"68", ptr addrspace(3) %"69") + store i64 %"30", ptr addrspace(3) %"65", align 8 + %"31" = call i64 @"3"(ptr addrspace(3) %"65", ptr addrspace(3) %"66") store i64 %"31", ptr addrspace(5) %"11", align 8 %"32" = load i64, ptr addrspace(5) %"11", align 8 ret i64 %"32" } define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { -"61": %"22" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"22", align 1 %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) %"18" = alloca i64, align 8, addrspace(5) %"19" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"22", align 1 %"33" = load i64, ptr addrspace(4) %"46", align 8 store i64 %"33", ptr addrspace(5) %"16", align 8 %"34" = load i64, ptr addrspace(4) %"47", align 8 @@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_decl(ptr addrspace(4) byref(i6 store i64 %"35", ptr addrspace(5) %"18", align 8 %"38" = load i64, ptr addrspace(5) %"16", align 8 %"54" = inttoptr i64 %"38" to ptr addrspace(1) - %"71" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 - %"37" = load i64, ptr addrspace(1) %"71", align 8 + %"68" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 + %"37" = load i64, ptr addrspace(1) %"68", align 8 store i64 %"37", ptr addrspace(5) %"19", align 8 %"39" = load i64, ptr addrspace(5) %"19", align 8 store i64 %"39", ptr addrspace(3) @shared_mod, align 8 diff --git a/ptx/src/test/spirv_run/shared_unify_extern.ll b/ptx/src/test/spirv_run/shared_unify_extern.ll index 819e8a1b..769fd9ff 100644 --- a/ptx/src/test/spirv_run/shared_unify_extern.ll +++ b/ptx/src/test/spirv_run/shared_unify_extern.ll @@ -4,16 +4,18 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @shared_mod = private addrspace(3) global [4 x i32] undef -define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { -"56": +define private i64 @"3"(ptr addrspace(3) %"56", ptr addrspace(3) %"57") #0 { %"4" = alloca i64, align 8, addrspace(5) %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) - %"20" = load i64, ptr addrspace(3) %"60", align 8 + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 + %"20" = load i64, ptr addrspace(3) %"57", align 8 store i64 %"20", ptr addrspace(5) %"5", align 8 - %"21" = load i64, ptr addrspace(3) %"59", align 8 + %"21" = load i64, ptr addrspace(3) %"56", align 8 store i64 %"21", ptr addrspace(5) %"6", align 8 %"23" = load i64, ptr addrspace(5) %"6", align 8 %"24" = load i64, ptr addrspace(5) %"5", align 8 @@ -23,29 +25,33 @@ define private i64 @"3"(ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { ret i64 %"25" } -define private i64 @"7"(i64 %"26", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { -"57": +define private i64 @"7"(i64 %"26", ptr addrspace(3) %"58", ptr addrspace(3) %"59") #0 { %"9" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 + br label %1 + +1: ; preds = %0 store i64 %"26", ptr addrspace(5) %"9", align 8 + store i1 false, ptr addrspace(5) %"18", align 1 %"27" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"27", ptr addrspace(3) %"61", align 8 - %"28" = call i64 @"3"(ptr addrspace(3) %"61", ptr addrspace(3) %"62") + store i64 %"27", ptr addrspace(3) %"58", align 8 + %"28" = call i64 @"3"(ptr addrspace(3) %"58", ptr addrspace(3) %"59") store i64 %"28", ptr addrspace(5) %"8", align 8 %"29" = load i64, ptr addrspace(5) %"8", align 8 ret i64 %"29" } define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { -"58": %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 %"13" = alloca i64, align 8, addrspace(5) %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"19", align 1 %"30" = load i64, ptr addrspace(4) %"43", align 8 store i64 %"30", ptr addrspace(5) %"13", align 8 %"31" = load i64, ptr addrspace(4) %"44", align 8 @@ -56,8 +62,8 @@ define protected amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref( store i64 %"32", ptr addrspace(5) %"15", align 8 %"35" = load i64, ptr addrspace(5) %"13", align 8 %"51" = inttoptr i64 %"35" to ptr addrspace(1) - %"64" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 - %"34" = load i64, ptr addrspace(1) %"64", align 8 + %"61" = getelementptr inbounds i8, ptr addrspace(1) %"51", i64 8 + %"34" = load i64, ptr addrspace(1) %"61", align 8 store i64 %"34", ptr addrspace(5) %"16", align 8 %"36" = load i64, ptr addrspace(5) %"16", align 8 store i64 %"36", ptr addrspace(3) @shared_mod, align 8 diff --git a/ptx/src/test/spirv_run/shared_unify_local.ll b/ptx/src/test/spirv_run/shared_unify_local.ll index b98b280c..522e0f50 100644 --- a/ptx/src/test/spirv_run/shared_unify_local.ll +++ b/ptx/src/test/spirv_run/shared_unify_local.ll @@ -4,19 +4,21 @@ target triple = "amdgcn-amd-amdhsa" @shared_ex = external hidden addrspace(3) global [0 x i32] @"5" = private addrspace(3) global i64 undef, align 4 -define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63") #0 { -"59": +define private i64 @"2"(i64 %"21", ptr addrspace(3) %"59", ptr addrspace(3) %"60") #0 { %"4" = alloca i64, align 8, addrspace(5) %"3" = alloca i64, align 8, addrspace(5) %"18" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"18", align 1 %"6" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 store i64 %"21", ptr addrspace(5) %"4", align 8 + store i1 false, ptr addrspace(5) %"18", align 1 %"22" = load i64, ptr addrspace(5) %"4", align 8 - store i64 %"22", ptr addrspace(3) %"63", align 8 - %"23" = load i64, ptr addrspace(3) %"63", align 8 + store i64 %"22", ptr addrspace(3) %"60", align 8 + %"23" = load i64, ptr addrspace(3) %"60", align 8 store i64 %"23", ptr addrspace(5) %"6", align 8 - %"24" = load i64, ptr addrspace(3) %"62", align 8 + %"24" = load i64, ptr addrspace(3) %"59", align 8 store i64 %"24", ptr addrspace(5) %"4", align 8 %"26" = load i64, ptr addrspace(5) %"4", align 8 %"27" = load i64, ptr addrspace(5) %"6", align 8 @@ -26,32 +28,36 @@ define private i64 @"2"(i64 %"21", ptr addrspace(3) %"62", ptr addrspace(3) %"63 ret i64 %"28" } -define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"64", ptr addrspace(3) %"65") #0 { -"60": +define private i64 @"7"(i64 %"29", i64 %"30", ptr addrspace(3) %"61", ptr addrspace(3) %"62") #0 { %"9" = alloca i64, align 8, addrspace(5) %"10" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) %"19" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"19", align 1 + br label %1 + +1: ; preds = %0 store i64 %"29", ptr addrspace(5) %"9", align 8 store i64 %"30", ptr addrspace(5) %"10", align 8 + store i1 false, ptr addrspace(5) %"19", align 1 %"31" = load i64, ptr addrspace(5) %"9", align 8 - store i64 %"31", ptr addrspace(3) %"64", align 8 + store i64 %"31", ptr addrspace(3) %"61", align 8 %"33" = load i64, ptr addrspace(5) %"10", align 8 - %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"64", ptr addrspace(3) %"65") + %"32" = call i64 @"2"(i64 %"33", ptr addrspace(3) %"61", ptr addrspace(3) %"62") store i64 %"32", ptr addrspace(5) %"8", align 8 %"34" = load i64, ptr addrspace(5) %"8", align 8 ret i64 %"34" } define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"61": %"20" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"20", align 1 %"14" = alloca i64, align 8, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) %"16" = alloca i64, align 8, addrspace(5) %"17" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"20", align 1 %"35" = load i64, ptr addrspace(4) %"48", align 8 store i64 %"35", ptr addrspace(5) %"14", align 8 %"36" = load i64, ptr addrspace(4) %"49", align 8 @@ -62,8 +68,8 @@ define protected amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i store i64 %"37", ptr addrspace(5) %"16", align 8 %"40" = load i64, ptr addrspace(5) %"14", align 8 %"55" = inttoptr i64 %"40" to ptr addrspace(1) - %"67" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 - %"39" = load i64, ptr addrspace(1) %"67", align 8 + %"64" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8 + %"39" = load i64, ptr addrspace(1) %"64", align 8 store i64 %"39", ptr addrspace(5) %"17", align 8 %"42" = load i64, ptr addrspace(5) %"16", align 8 %"43" = load i64, ptr addrspace(5) %"17", align 8 diff --git a/ptx/src/test/spirv_run/shared_variable.ll b/ptx/src/test/spirv_run/shared_variable.ll index 859a767c..ac1e519c 100644 --- a/ptx/src/test/spirv_run/shared_variable.ll +++ b/ptx/src/test/spirv_run/shared_variable.ll @@ -4,13 +4,15 @@ target triple = "amdgcn-amd-amdhsa" @"4" = private addrspace(3) global [128 x i8] undef, align 4 define protected amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"10", ptr addrspace(5) %"5", align 8 %"11" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/shf.ll b/ptx/src/test/spirv_run/shf.ll index 22be32ad..317a60f1 100644 --- a/ptx/src/test/spirv_run/shf.ll +++ b/ptx/src/test/spirv_run/shf.ll @@ -2,14 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr addrspace(4) byref(i64) %"25") #0 { -"32": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"24", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"25", align 8 @@ -20,8 +22,8 @@ define protected amdgpu_kernel void @shf(ptr addrspace(4) byref(i64) %"24", ptr store i32 %"12", ptr addrspace(5) %"6", align 4 %"15" = load i64, ptr addrspace(5) %"4", align 8 %"27" = inttoptr i64 %"15" to ptr - %"34" = getelementptr inbounds i8, ptr %"27", i64 4 - %"14" = load i32, ptr %"34", align 4 + %"33" = getelementptr inbounds i8, ptr %"27", i64 4 + %"14" = load i32, ptr %"33", align 4 store i32 %"14", ptr addrspace(5) %"7", align 4 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"18" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/shl.ll b/ptx/src/test/spirv_run/shl.ll index 40c33659..9f9b6092 100644 --- a/ptx/src/test/spirv_run/shl.ll +++ b/ptx/src/test/spirv_run/shl.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"24": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 @@ -18,8 +20,8 @@ define protected amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"18", ptr %"11" = load i64, ptr %"20", align 8 store i64 %"11", ptr addrspace(5) %"6", align 8 %"14" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"14", 2 - %"21" = select i1 false, i64 0, i64 %0 + %2 = shl i64 %"14", 2 + %"21" = select i1 false, i64 0, i64 %2 store i64 %"21", ptr addrspace(5) %"7", align 8 %"15" = load i64, ptr addrspace(5) %"5", align 8 %"16" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/shl_link_hack.ll b/ptx/src/test/spirv_run/shl_link_hack.ll index 9ac3883b..29d1c74a 100644 --- a/ptx/src/test/spirv_run/shl_link_hack.ll +++ b/ptx/src/test/spirv_run/shl_link_hack.ll @@ -4,14 +4,16 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__atom_relaxed_gpu_generic_inc(ptr, i32) #0 define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #1 { -"29": %"9" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"9", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"9", align 1 %"10" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"10", ptr addrspace(5) %"4", align 8 %"11" = load i64, ptr addrspace(4) %"23", align 8 @@ -25,8 +27,8 @@ define protected amdgpu_kernel void @shl_link_hack(ptr addrspace(4) byref(i64) % %"14" = load i64, ptr %"25", align 8 store i64 %"14", ptr addrspace(5) %"6", align 8 %"17" = load i64, ptr addrspace(5) %"6", align 8 - %0 = shl i64 %"17", 2 - %"26" = select i1 false, i64 0, i64 %0 + %2 = shl i64 %"17", 2 + %"26" = select i1 false, i64 0, i64 %2 store i64 %"26", ptr addrspace(5) %"7", align 8 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i64, ptr addrspace(5) %"7", align 8 diff --git a/ptx/src/test/spirv_run/shl_overflow.ll b/ptx/src/test/spirv_run/shl_overflow.ll index 80d4871b..86178d88 100644 --- a/ptx/src/test/spirv_run/shl_overflow.ll +++ b/ptx/src/test/spirv_run/shl_overflow.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #0 { -"62": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"47", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"48", align 8 @@ -22,24 +24,24 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" store i32 %"14", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"50" = inttoptr i64 %"17" to ptr - %"64" = getelementptr inbounds i8, ptr %"50", i64 4 - %"16" = load i32, ptr %"64", align 4 + %"63" = getelementptr inbounds i8, ptr %"50", i64 4 + %"16" = load i32, ptr %"63", align 4 store i32 %"16", ptr addrspace(5) %"8", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"51" = inttoptr i64 %"19" to ptr - %"66" = getelementptr inbounds i8, ptr %"51", i64 8 - %"18" = load i32, ptr %"66", align 4 + %"65" = getelementptr inbounds i8, ptr %"51", i64 8 + %"18" = load i32, ptr %"65", align 4 store i32 %"18", ptr addrspace(5) %"9", align 4 %"21" = load i64, ptr addrspace(5) %"4", align 8 %"52" = inttoptr i64 %"21" to ptr - %"68" = getelementptr inbounds i8, ptr %"52", i64 12 - %"20" = load i32, ptr %"68", align 4 + %"67" = getelementptr inbounds i8, ptr %"52", i64 12 + %"20" = load i32, ptr %"67", align 4 store i32 %"20", ptr addrspace(5) %"10", align 4 %"23" = load i32, ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"8", align 4 - %0 = icmp ugt i32 %"24", 31 - %1 = shl i32 %"23", %"24" - %"53" = select i1 %0, i32 0, i32 %1 + %2 = icmp ugt i32 %"24", 31 + %3 = shl i32 %"23", %"24" + %"53" = select i1 %2, i32 0, i32 %3 store i32 %"53", ptr addrspace(5) %"7", align 4 %"25" = load i64, ptr addrspace(5) %"5", align 8 %"26" = load i32, ptr addrspace(5) %"7", align 4 @@ -47,26 +49,26 @@ define protected amdgpu_kernel void @shl_overflow(ptr addrspace(4) byref(i64) %" store i32 %"26", ptr %"55", align 4 %"28" = load i32, ptr addrspace(5) %"6", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 - %2 = icmp ugt i32 %"29", 31 - %3 = shl i32 %"28", %"29" - %"56" = select i1 %2, i32 0, i32 %3 + %4 = icmp ugt i32 %"29", 31 + %5 = shl i32 %"28", %"29" + %"56" = select i1 %4, i32 0, i32 %5 store i32 %"56", ptr addrspace(5) %"7", align 4 %"30" = load i64, ptr addrspace(5) %"5", align 8 %"31" = load i32, ptr addrspace(5) %"7", align 4 %"58" = inttoptr i64 %"30" to ptr - %"70" = getelementptr inbounds i8, ptr %"58", i64 4 - store i32 %"31", ptr %"70", align 4 + %"69" = getelementptr inbounds i8, ptr %"58", i64 4 + store i32 %"31", ptr %"69", align 4 %"33" = load i32, ptr addrspace(5) %"6", align 4 %"34" = load i32, ptr addrspace(5) %"10", align 4 - %4 = icmp ugt i32 %"34", 31 - %5 = shl i32 %"33", %"34" - %"59" = select i1 %4, i32 0, i32 %5 + %6 = icmp ugt i32 %"34", 31 + %7 = shl i32 %"33", %"34" + %"59" = select i1 %6, i32 0, i32 %7 store i32 %"59", ptr addrspace(5) %"7", align 4 %"35" = load i64, ptr addrspace(5) %"5", align 8 %"36" = load i32, ptr addrspace(5) %"7", align 4 %"61" = inttoptr i64 %"35" to ptr - %"72" = getelementptr inbounds i8, ptr %"61", i64 8 - store i32 %"36", ptr %"72", align 4 + %"71" = getelementptr inbounds i8, ptr %"61", i64 8 + store i32 %"36", ptr %"71", align 4 ret void } diff --git a/ptx/src/test/spirv_run/shr_s32.ll b/ptx/src/test/spirv_run/shr_s32.ll index 77c71f9e..a6a6d98c 100644 --- a/ptx/src/test/spirv_run/shr_s32.ll +++ b/ptx/src/test/spirv_run/shr_s32.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"28": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,14 +21,14 @@ define protected amdgpu_kernel void @shr_s32(ptr addrspace(4) byref(i64) %"22", store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"30" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"30", align 4 + %"29" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"29", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"17", 31 - %1 = ashr i32 %"16", %"17" - %"15" = select i1 %0, i32 -1, i32 %1 + %2 = icmp ugt i32 %"17", 31 + %3 = ashr i32 %"16", %"17" + %"15" = select i1 %2, i32 -1, i32 %3 store i32 %"15", ptr addrspace(5) %"6", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/shr_u32.ll b/ptx/src/test/spirv_run/shr_u32.ll index 22c8761a..52153d99 100644 --- a/ptx/src/test/spirv_run/shr_u32.ll +++ b/ptx/src/test/spirv_run/shr_u32.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { -"45": %"11" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"11", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -12,6 +10,10 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) %"10" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"11", align 1 %"12" = load i64, ptr addrspace(4) %"36", align 8 store i64 %"12", ptr addrspace(5) %"4", align 8 %"13" = load i64, ptr addrspace(4) %"37", align 8 @@ -22,25 +24,25 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", store i32 %"14", ptr addrspace(5) %"6", align 4 %"17" = load i64, ptr addrspace(5) %"4", align 8 %"39" = inttoptr i64 %"17" to ptr - %"47" = getelementptr inbounds i8, ptr %"39", i64 4 - %"16" = load i32, ptr %"47", align 4 + %"46" = getelementptr inbounds i8, ptr %"39", i64 4 + %"16" = load i32, ptr %"46", align 4 store i32 %"16", ptr addrspace(5) %"7", align 4 %"19" = load i64, ptr addrspace(5) %"4", align 8 %"40" = inttoptr i64 %"19" to ptr - %"49" = getelementptr inbounds i8, ptr %"40", i64 8 - %"18" = load i32, ptr %"49", align 4 + %"48" = getelementptr inbounds i8, ptr %"40", i64 8 + %"18" = load i32, ptr %"48", align 4 store i32 %"18", ptr addrspace(5) %"8", align 4 %"21" = load i32, ptr addrspace(5) %"6", align 4 %"22" = load i32, ptr addrspace(5) %"7", align 4 - %0 = icmp ugt i32 %"22", 31 - %1 = lshr i32 %"21", %"22" - %"20" = select i1 %0, i32 0, i32 %1 + %2 = icmp ugt i32 %"22", 31 + %3 = lshr i32 %"21", %"22" + %"20" = select i1 %2, i32 0, i32 %3 store i32 %"20", ptr addrspace(5) %"9", align 4 %"24" = load i32, ptr addrspace(5) %"6", align 4 %"25" = load i32, ptr addrspace(5) %"8", align 4 - %2 = icmp ugt i32 %"25", 31 - %3 = lshr i32 %"24", %"25" - %"23" = select i1 %2, i32 0, i32 %3 + %4 = icmp ugt i32 %"25", 31 + %5 = lshr i32 %"24", %"25" + %"23" = select i1 %4, i32 0, i32 %5 store i32 %"23", ptr addrspace(5) %"10", align 4 %"26" = load i64, ptr addrspace(5) %"5", align 8 %"27" = load i32, ptr addrspace(5) %"9", align 4 @@ -49,8 +51,8 @@ define protected amdgpu_kernel void @shr_u32(ptr addrspace(4) byref(i64) %"36", %"28" = load i64, ptr addrspace(5) %"5", align 8 %"29" = load i32, ptr addrspace(5) %"10", align 4 %"44" = inttoptr i64 %"28" to ptr - %"51" = getelementptr inbounds i8, ptr %"44", i64 4 - store i32 %"29", ptr %"51", align 4 + %"50" = getelementptr inbounds i8, ptr %"44", i64 4 + store i32 %"29", ptr %"50", align 4 ret void } diff --git a/ptx/src/test/spirv_run/sign_extend.ll b/ptx/src/test/spirv_run/sign_extend.ll index ef262615..98494e34 100644 --- a/ptx/src/test/spirv_run/sign_extend.ll +++ b/ptx/src/test/spirv_run/sign_extend.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"14", ptr addrspace(4) byref(i64) %"15") #0 { -"19": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"14", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"15", align 8 diff --git a/ptx/src/test/spirv_run/sin.ll b/ptx/src/test/spirv_run/sin.ll index f38aedd0..33f510c8 100644 --- a/ptx/src/test/spirv_run/sin.ll +++ b/ptx/src/test/spirv_run/sin.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/sqrt.ll b/ptx/src/test/spirv_run/sqrt.ll index c8e4ec0f..f86753ea 100644 --- a/ptx/src/test/spirv_run/sqrt.ll +++ b/ptx/src/test/spirv_run/sqrt.ll @@ -2,12 +2,14 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"16", ptr addrspace(4) byref(i64) %"17") #0 { -"20": %"7" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"7", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca float, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"7", align 1 %"8" = load i64, ptr addrspace(4) %"16", align 8 store i64 %"8", ptr addrspace(5) %"4", align 8 %"9" = load i64, ptr addrspace(4) %"17", align 8 diff --git a/ptx/src/test/spirv_run/sub.ll b/ptx/src/test/spirv_run/sub.ll index 83fec5f7..24a12bd2 100644 --- a/ptx/src/test/spirv_run/sub.ll +++ b/ptx/src/test/spirv_run/sub.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #0 { -"22": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i64, align 8, addrspace(5) %"7" = alloca i64, align 8, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"18", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"19", align 8 diff --git a/ptx/src/test/spirv_run/subc_cc.ll b/ptx/src/test/spirv_run/subc_cc.ll index 0101b834..cdd5c0b8 100644 --- a/ptx/src/test/spirv_run/subc_cc.ll +++ b/ptx/src/test/spirv_run/subc_cc.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #0 { -"72": %"13" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"13", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) @@ -14,6 +12,10 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"10" = alloca i32, align 4, addrspace(5) %"11" = alloca i32, align 4, addrspace(5) %"12" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"13", align 1 %"18" = load i64, ptr addrspace(4) %"57", align 8 store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"58", align 8 @@ -24,24 +26,24 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", store i32 %"59", ptr addrspace(5) %"9", align 4 %"23" = load i64, ptr addrspace(5) %"4", align 8 %"61" = inttoptr i64 %"23" to ptr - %"74" = getelementptr inbounds i8, ptr %"61", i64 4 - %"62" = load i32, ptr %"74", align 4 + %"73" = getelementptr inbounds i8, ptr %"61", i64 4 + %"62" = load i32, ptr %"73", align 4 store i32 %"62", ptr addrspace(5) %"10", align 4 %"25" = load i64, ptr addrspace(5) %"4", align 8 %"63" = inttoptr i64 %"25" to ptr - %"76" = getelementptr inbounds i8, ptr %"63", i64 8 - %"24" = load i32, ptr %"76", align 4 + %"75" = getelementptr inbounds i8, ptr %"63", i64 8 + %"24" = load i32, ptr %"75", align 4 store i32 %"24", ptr addrspace(5) %"11", align 4 %"27" = load i64, ptr addrspace(5) %"4", align 8 %"64" = inttoptr i64 %"27" to ptr - %"78" = getelementptr inbounds i8, ptr %"64", i64 12 - %"26" = load i32, ptr %"78", align 4 + %"77" = getelementptr inbounds i8, ptr %"64", i64 12 + %"26" = load i32, ptr %"77", align 4 store i32 %"26", ptr addrspace(5) %"12", align 4 %"29" = load i32, ptr addrspace(5) %"9", align 4 %"30" = load i32, ptr addrspace(5) %"10", align 4 - %0 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") - %"28" = extractvalue { i32, i1 } %0, 0 - %"14" = extractvalue { i32, i1 } %0, 1 + %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"29", i32 %"30") + %"28" = extractvalue { i32, i1 } %2, 0 + %"14" = extractvalue { i32, i1 } %2, 1 store i32 %"28", ptr addrspace(5) %"6", align 4 %"31" = xor i1 %"14", true store i1 %"31", ptr addrspace(5) %"13", align 1 @@ -49,14 +51,14 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"15" = xor i1 %"32", true %"34" = load i32, ptr addrspace(5) %"6", align 4 %"35" = load i32, ptr addrspace(5) %"11", align 4 - %1 = zext i1 %"15" to i32 - %2 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") - %3 = extractvalue { i32, i1 } %2, 0 - %4 = extractvalue { i32, i1 } %2, 1 - %5 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %1) - %"33" = extractvalue { i32, i1 } %5, 0 - %6 = extractvalue { i32, i1 } %5, 1 - %"16" = xor i1 %4, %6 + %3 = zext i1 %"15" to i32 + %4 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %"34", i32 %"35") + %5 = extractvalue { i32, i1 } %4, 0 + %6 = extractvalue { i32, i1 } %4, 1 + %7 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %5, i32 %3) + %"33" = extractvalue { i32, i1 } %7, 0 + %8 = extractvalue { i32, i1 } %7, 1 + %"16" = xor i1 %6, %8 store i32 %"33", ptr addrspace(5) %"7", align 4 %"36" = xor i1 %"16", true store i1 %"36", ptr addrspace(5) %"13", align 1 @@ -64,9 +66,9 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"17" = xor i1 %"37", true %"39" = load i32, ptr addrspace(5) %"7", align 4 %"40" = load i32, ptr addrspace(5) %"12", align 4 - %7 = zext i1 %"17" to i32 - %8 = sub i32 %"39", %"40" - %"38" = sub i32 %8, %7 + %9 = zext i1 %"17" to i32 + %10 = sub i32 %"39", %"40" + %"38" = sub i32 %10, %9 store i32 %"38", ptr addrspace(5) %"8", align 4 %"41" = load i64, ptr addrspace(5) %"5", align 8 %"42" = load i32, ptr addrspace(5) %"6", align 4 @@ -75,13 +77,13 @@ define protected amdgpu_kernel void @subc_cc(ptr addrspace(4) byref(i64) %"57", %"43" = load i64, ptr addrspace(5) %"5", align 8 %"44" = load i32, ptr addrspace(5) %"7", align 4 %"70" = inttoptr i64 %"43" to ptr - %"80" = getelementptr inbounds i8, ptr %"70", i64 4 - store i32 %"44", ptr %"80", align 4 + %"79" = getelementptr inbounds i8, ptr %"70", i64 4 + store i32 %"44", ptr %"79", align 4 %"45" = load i64, ptr addrspace(5) %"5", align 8 %"46" = load i32, ptr addrspace(5) %"8", align 4 %"71" = inttoptr i64 %"45" to ptr - %"82" = getelementptr inbounds i8, ptr %"71", i64 8 - store i32 %"46", ptr %"82", align 4 + %"81" = getelementptr inbounds i8, ptr %"71", i64 8 + store i32 %"46", ptr %"81", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vector.ll b/ptx/src/test/spirv_run/vector.ll index b60aaecd..f311be72 100644 --- a/ptx/src/test/spirv_run/vector.ll +++ b/ptx/src/test/spirv_run/vector.ll @@ -2,69 +2,74 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define private <2 x i32> @"1"(<2 x i32> %"18") #0 { -"50": %"3" = alloca <2 x i32>, align 8, addrspace(5) %"2" = alloca <2 x i32>, align 8, addrspace(5) %"16" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"16", align 1 %"4" = alloca <2 x i32>, align 8, addrspace(5) %"5" = alloca i32, align 4, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) - store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 - %0 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 - %"20" = load i32, ptr addrspace(5) %0, align 4 %1 = alloca i32, align 4, addrspace(5) + %2 = alloca i32, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + %4 = alloca i32, align 4, addrspace(5) + %5 = alloca i32, align 4, addrspace(5) + %6 = alloca <2 x i32>, align 8, addrspace(5) + br label %7 + +7: ; preds = %0 + store <2 x i32> %"18", ptr addrspace(5) %"3", align 8 + store i1 false, ptr addrspace(5) %"16", align 1 + %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 0 + %"20" = load i32, ptr addrspace(5) %8, align 4 store i32 %"20", ptr addrspace(5) %1, align 4 %"19" = load i32, ptr addrspace(5) %1, align 4 store i32 %"19", ptr addrspace(5) %"5", align 4 - %2 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 - %"22" = load i32, ptr addrspace(5) %2, align 4 - %3 = alloca i32, align 4, addrspace(5) - store i32 %"22", ptr addrspace(5) %3, align 4 - %"21" = load i32, ptr addrspace(5) %3, align 4 + %9 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"3", i32 0, i32 1 + %"22" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"22", ptr addrspace(5) %2, align 4 + %"21" = load i32, ptr addrspace(5) %2, align 4 store i32 %"21", ptr addrspace(5) %"6", align 4 %"24" = load i32, ptr addrspace(5) %"5", align 4 %"25" = load i32, ptr addrspace(5) %"6", align 4 %"23" = add i32 %"24", %"25" store i32 %"23", ptr addrspace(5) %"6", align 4 %"27" = load i32, ptr addrspace(5) %"6", align 4 - %4 = alloca i32, align 4, addrspace(5) - store i32 %"27", ptr addrspace(5) %4, align 4 - %"26" = load i32, ptr addrspace(5) %4, align 4 - %5 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"26", ptr addrspace(5) %5, align 4 - %"29" = load i32, ptr addrspace(5) %"6", align 4 - %6 = alloca i32, align 4, addrspace(5) - store i32 %"29", ptr addrspace(5) %6, align 4 - %"28" = load i32, ptr addrspace(5) %6, align 4 - %7 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - store i32 %"28", ptr addrspace(5) %7, align 4 - %8 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 - %"31" = load i32, ptr addrspace(5) %8, align 4 - %9 = alloca i32, align 4, addrspace(5) - store i32 %"31", ptr addrspace(5) %9, align 4 - %"30" = load i32, ptr addrspace(5) %9, align 4 + store i32 %"27", ptr addrspace(5) %3, align 4 + %"26" = load i32, ptr addrspace(5) %3, align 4 %10 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 - store i32 %"30", ptr addrspace(5) %10, align 4 + store i32 %"26", ptr addrspace(5) %10, align 4 + %"29" = load i32, ptr addrspace(5) %"6", align 4 + store i32 %"29", ptr addrspace(5) %4, align 4 + %"28" = load i32, ptr addrspace(5) %4, align 4 + %11 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 + store i32 %"28", ptr addrspace(5) %11, align 4 + %12 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 1 + %"31" = load i32, ptr addrspace(5) %12, align 4 + store i32 %"31", ptr addrspace(5) %5, align 4 + %"30" = load i32, ptr addrspace(5) %5, align 4 + %13 = getelementptr inbounds <2 x i32>, ptr addrspace(5) %"4", i32 0, i32 0 + store i32 %"30", ptr addrspace(5) %13, align 4 %"33" = load <2 x i32>, ptr addrspace(5) %"4", align 8 - %11 = alloca <2 x i32>, align 8, addrspace(5) - store <2 x i32> %"33", ptr addrspace(5) %11, align 8 - %"32" = load <2 x i32>, ptr addrspace(5) %11, align 8 + store <2 x i32> %"33", ptr addrspace(5) %6, align 8 + %"32" = load <2 x i32>, ptr addrspace(5) %6, align 8 store <2 x i32> %"32", ptr addrspace(5) %"2", align 8 %"34" = load <2 x i32>, ptr addrspace(5) %"2", align 8 ret <2 x i32> %"34" } define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { -"51": %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"10" = alloca i64, align 8, addrspace(5) %"11" = alloca i64, align 8, addrspace(5) %"12" = alloca <2 x i32>, align 8, addrspace(5) %"13" = alloca i32, align 4, addrspace(5) %"14" = alloca i32, align 4, addrspace(5) %"15" = alloca i64, align 8, addrspace(5) + %1 = alloca i64, align 8, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 %"35" = load i64, ptr addrspace(4) %"45", align 8 store i64 %"35", ptr addrspace(5) %"10", align 8 %"36" = load i64, ptr addrspace(4) %"46", align 8 @@ -78,9 +83,8 @@ define protected amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"45", p store <2 x i32> %"39", ptr addrspace(5) %"12", align 8 %"42" = load <2 x i32>, ptr addrspace(5) %"12", align 8 %"48" = bitcast <2 x i32> %"42" to i64 - %0 = alloca i64, align 8, addrspace(5) - store i64 %"48", ptr addrspace(5) %0, align 8 - %"41" = load i64, ptr addrspace(5) %0, align 8 + store i64 %"48", ptr addrspace(5) %1, align 8 + %"41" = load i64, ptr addrspace(5) %1, align 8 store i64 %"41", ptr addrspace(5) %"15", align 8 %"43" = load i64, ptr addrspace(5) %"11", align 8 %"44" = load <2 x i32>, ptr addrspace(5) %"12", align 8 diff --git a/ptx/src/test/spirv_run/vector4.ll b/ptx/src/test/spirv_run/vector4.ll index 494b1aff..7d92885e 100644 --- a/ptx/src/test/spirv_run/vector4.ll +++ b/ptx/src/test/spirv_run/vector4.ll @@ -2,13 +2,16 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", ptr addrspace(4) byref(i64) %"18") #0 { -"23": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca <4 x i32>, align 16, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + %1 = alloca i32, align 4, addrspace(5) + br label %2 + +2: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"17", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"18", align 8 @@ -17,9 +20,8 @@ define protected amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"17", %"19" = inttoptr i64 %"12" to ptr %"11" = load <4 x i32>, ptr %"19", align 16 store <4 x i32> %"11", ptr addrspace(5) %"6", align 16 - %0 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 - %"14" = load i32, ptr addrspace(5) %0, align 4 - %1 = alloca i32, align 4, addrspace(5) + %3 = getelementptr inbounds <4 x i32>, ptr addrspace(5) %"6", i32 0, i32 3 + %"14" = load i32, ptr addrspace(5) %3, align 4 store i32 %"14", ptr addrspace(5) %1, align 4 %"20" = load i32, ptr addrspace(5) %1, align 4 store i32 %"20", ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/test/spirv_run/vector_extract.ll b/ptx/src/test/spirv_run/vector_extract.ll index d877dc73..ea2e2db3 100644 --- a/ptx/src/test/spirv_run/vector_extract.ll +++ b/ptx/src/test/spirv_run/vector_extract.ll @@ -2,9 +2,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #0 { -"60": %"17" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"17", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i16, align 2, addrspace(5) @@ -12,6 +10,13 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"8" = alloca i16, align 2, addrspace(5) %"9" = alloca i16, align 2, addrspace(5) %"10" = alloca <4 x i16>, align 8, addrspace(5) + %1 = alloca <4 x i16>, align 8, addrspace(5) + %2 = alloca <4 x i16>, align 8, addrspace(5) + %3 = alloca <4 x i16>, align 8, addrspace(5) + br label %4 + +4: ; preds = %0 + store i1 false, ptr addrspace(5) %"17", align 1 %"18" = load i64, ptr addrspace(4) %"48", align 8 store i64 %"18", ptr addrspace(5) %"4", align 8 %"19" = load i64, ptr addrspace(4) %"49", align 8 @@ -35,18 +40,16 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"26" = load i16, ptr addrspace(5) %"8", align 2 %"27" = load i16, ptr addrspace(5) %"9", align 2 %"28" = load i16, ptr addrspace(5) %"6", align 2 - %0 = insertelement <4 x i16> undef, i16 %"25", i32 0 - %1 = insertelement <4 x i16> %0, i16 %"26", i32 1 - %2 = insertelement <4 x i16> %1, i16 %"27", i32 2 - %"12" = insertelement <4 x i16> %2, i16 %"28", i32 3 - %3 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"12", ptr addrspace(5) %3, align 8 - %"29" = load <4 x i16>, ptr addrspace(5) %3, align 8 + %5 = insertelement <4 x i16> undef, i16 %"25", i32 0 + %6 = insertelement <4 x i16> %5, i16 %"26", i32 1 + %7 = insertelement <4 x i16> %6, i16 %"27", i32 2 + %"12" = insertelement <4 x i16> %7, i16 %"28", i32 3 + store <4 x i16> %"12", ptr addrspace(5) %1, align 8 + %"29" = load <4 x i16>, ptr addrspace(5) %1, align 8 store <4 x i16> %"29", ptr addrspace(5) %"10", align 8 %"30" = load <4 x i16>, ptr addrspace(5) %"10", align 8 - %4 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"30", ptr addrspace(5) %4, align 8 - %"13" = load <4 x i16>, ptr addrspace(5) %4, align 8 + store <4 x i16> %"30", ptr addrspace(5) %2, align 8 + %"13" = load <4 x i16>, ptr addrspace(5) %2, align 8 %"31" = extractelement <4 x i16> %"13", i32 0 %"32" = extractelement <4 x i16> %"13", i32 1 %"33" = extractelement <4 x i16> %"13", i32 2 @@ -59,13 +62,12 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"36" = load i16, ptr addrspace(5) %"9", align 2 %"37" = load i16, ptr addrspace(5) %"6", align 2 %"38" = load i16, ptr addrspace(5) %"7", align 2 - %5 = insertelement <4 x i16> undef, i16 %"35", i32 0 - %6 = insertelement <4 x i16> %5, i16 %"36", i32 1 - %7 = insertelement <4 x i16> %6, i16 %"37", i32 2 - %"15" = insertelement <4 x i16> %7, i16 %"38", i32 3 - %8 = alloca <4 x i16>, align 8, addrspace(5) - store <4 x i16> %"15", ptr addrspace(5) %8, align 8 - %"14" = load <4 x i16>, ptr addrspace(5) %8, align 8 + %8 = insertelement <4 x i16> undef, i16 %"35", i32 0 + %9 = insertelement <4 x i16> %8, i16 %"36", i32 1 + %10 = insertelement <4 x i16> %9, i16 %"37", i32 2 + %"15" = insertelement <4 x i16> %10, i16 %"38", i32 3 + store <4 x i16> %"15", ptr addrspace(5) %3, align 8 + %"14" = load <4 x i16>, ptr addrspace(5) %3, align 8 %"39" = extractelement <4 x i16> %"14", i32 0 %"40" = extractelement <4 x i16> %"14", i32 1 %"41" = extractelement <4 x i16> %"14", i32 2 @@ -82,10 +84,10 @@ define protected amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"56" = trunc i16 %"44" to i8 %"57" = trunc i16 %"45" to i8 %"58" = trunc i16 %"46" to i8 - %9 = insertelement <4 x i8> undef, i8 %"55", i32 0 - %10 = insertelement <4 x i8> %9, i8 %"56", i32 1 - %11 = insertelement <4 x i8> %10, i8 %"57", i32 2 - %"16" = insertelement <4 x i8> %11, i8 %"58", i32 3 + %11 = insertelement <4 x i8> undef, i8 %"55", i32 0 + %12 = insertelement <4 x i8> %11, i8 %"56", i32 1 + %13 = insertelement <4 x i8> %12, i8 %"57", i32 2 + %"16" = insertelement <4 x i8> %13, i8 %"58", i32 3 %"47" = load i64, ptr addrspace(5) %"5", align 8 %"59" = inttoptr i64 %"47" to ptr addrspace(1) store <4 x i8> %"16", ptr addrspace(1) %"59", align 4 diff --git a/ptx/src/test/spirv_run/vote_ballot.ll b/ptx/src/test/spirv_run/vote_ballot.ll index fd31f1a3..efba70ad 100644 --- a/ptx/src/test/spirv_run/vote_ballot.ll +++ b/ptx/src/test/spirv_run/vote_ballot.ll @@ -4,15 +4,17 @@ target triple = "amdgcn-amd-amdhsa" declare i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1, i32) #0 define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 { -"50": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"41", align 8 store i64 %"11", ptr addrspace(5) %"5", align 8 %"42" = call i32 @__zluda_ptx_impl__vote_sync_ballot_b32_32(i1 true, i32 1) @@ -26,23 +28,23 @@ define protected amdgpu_kernel void @vote_ballot(ptr addrspace(4) byref(i64) %"4 %"16" = load i64, ptr addrspace(5) %"5", align 8 %"17" = load i32, ptr addrspace(5) %"6", align 4 %"46" = inttoptr i64 %"16" to ptr - %"56" = getelementptr inbounds i8, ptr %"46", i64 0 - store i32 %"17", ptr %"56", align 4 + %"55" = getelementptr inbounds i8, ptr %"46", i64 0 + store i32 %"17", ptr %"55", align 4 %"18" = load i64, ptr addrspace(5) %"5", align 8 %"19" = load i32, ptr addrspace(5) %"7", align 4 %"47" = inttoptr i64 %"18" to ptr - %"58" = getelementptr inbounds i8, ptr %"47", i64 4 - store i32 %"19", ptr %"58", align 4 + %"57" = getelementptr inbounds i8, ptr %"47", i64 4 + store i32 %"19", ptr %"57", align 4 %"20" = load i64, ptr addrspace(5) %"5", align 8 %"21" = load i32, ptr addrspace(5) %"8", align 4 %"48" = inttoptr i64 %"20" to ptr - %"60" = getelementptr inbounds i8, ptr %"48", i64 8 - store i32 %"21", ptr %"60", align 4 + %"59" = getelementptr inbounds i8, ptr %"48", i64 8 + store i32 %"21", ptr %"59", align 4 %"22" = load i64, ptr addrspace(5) %"5", align 8 %"23" = load i32, ptr addrspace(5) %"9", align 4 %"49" = inttoptr i64 %"22" to ptr - %"62" = getelementptr inbounds i8, ptr %"49", i64 12 - store i32 %"23", ptr %"62", align 4 + %"61" = getelementptr inbounds i8, ptr %"49", i64 12 + store i32 %"23", ptr %"61", align 4 ret void } diff --git a/ptx/src/test/spirv_run/vshr.ll b/ptx/src/test/spirv_run/vshr.ll index 4433bf24..3d247701 100644 --- a/ptx/src/test/spirv_run/vshr.ll +++ b/ptx/src/test/spirv_run/vshr.ll @@ -2,15 +2,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 { -"38": %"10" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"10", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) %"8" = alloca i32, align 4, addrspace(5) %"9" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"10", align 1 %"11" = load i64, ptr addrspace(4) %"29", align 8 store i64 %"11", ptr addrspace(5) %"4", align 8 %"12" = load i64, ptr addrspace(4) %"30", align 8 @@ -21,21 +23,21 @@ define protected amdgpu_kernel void @vshr(ptr addrspace(4) byref(i64) %"29", ptr store i32 %"31", ptr addrspace(5) %"7", align 4 %"16" = load i64, ptr addrspace(5) %"4", align 8 %"33" = inttoptr i64 %"16" to ptr - %"40" = getelementptr inbounds i8, ptr %"33", i64 4 - %"34" = load i32, ptr %"40", align 4 + %"39" = getelementptr inbounds i8, ptr %"33", i64 4 + %"34" = load i32, ptr %"39", align 4 store i32 %"34", ptr addrspace(5) %"8", align 4 %"18" = load i64, ptr addrspace(5) %"4", align 8 %"35" = inttoptr i64 %"18" to ptr - %"42" = getelementptr inbounds i8, ptr %"35", i64 8 - %"36" = load i32, ptr %"42", align 4 + %"41" = getelementptr inbounds i8, ptr %"35", i64 8 + %"36" = load i32, ptr %"41", align 4 store i32 %"36", ptr addrspace(5) %"9", align 4 %"20" = load i32, ptr addrspace(5) %"7", align 4 %"21" = load i32, ptr addrspace(5) %"8", align 4 %"22" = load i32, ptr addrspace(5) %"9", align 4 - %0 = icmp ugt i32 %"21", 31 - %1 = lshr i32 %"20", %"21" - %2 = select i1 %0, i32 0, i32 %1 - %"19" = add i32 %2, %"22" + %2 = icmp ugt i32 %"21", 31 + %3 = lshr i32 %"20", %"21" + %4 = select i1 %2, i32 0, i32 %3 + %"19" = add i32 %4, %"22" store i32 %"19", ptr addrspace(5) %"6", align 4 %"23" = load i64, ptr addrspace(5) %"5", align 8 %"24" = load i32, ptr addrspace(5) %"6", align 4 diff --git a/ptx/src/test/spirv_run/xor.ll b/ptx/src/test/spirv_run/xor.ll index 96b29149..bc0ad261 100644 --- a/ptx/src/test/spirv_run/xor.ll +++ b/ptx/src/test/spirv_run/xor.ll @@ -2,13 +2,15 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3 target triple = "amdgcn-amd-amdhsa" define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr addrspace(4) byref(i64) %"23") #0 { -"27": %"8" = alloca i1, align 1, addrspace(5) - store i1 false, ptr addrspace(5) %"8", align 1 %"4" = alloca i64, align 8, addrspace(5) %"5" = alloca i64, align 8, addrspace(5) %"6" = alloca i32, align 4, addrspace(5) %"7" = alloca i32, align 4, addrspace(5) + br label %1 + +1: ; preds = %0 + store i1 false, ptr addrspace(5) %"8", align 1 %"9" = load i64, ptr addrspace(4) %"22", align 8 store i64 %"9", ptr addrspace(5) %"4", align 8 %"10" = load i64, ptr addrspace(4) %"23", align 8 @@ -19,8 +21,8 @@ define protected amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"22", ptr store i32 %"11", ptr addrspace(5) %"6", align 4 %"14" = load i64, ptr addrspace(5) %"4", align 8 %"25" = inttoptr i64 %"14" to ptr - %"29" = getelementptr inbounds i8, ptr %"25", i64 4 - %"13" = load i32, ptr %"29", align 4 + %"28" = getelementptr inbounds i8, ptr %"25", i64 4 + %"13" = load i32, ptr %"28", align 4 store i32 %"13", ptr addrspace(5) %"7", align 4 %"16" = load i32, ptr addrspace(5) %"6", align 4 %"17" = load i32, ptr addrspace(5) %"7", align 4 diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 10852583..b06fa527 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -2526,58 +2526,6 @@ fn insert_implicit_conversions2_impl<'input>( Ok(result) } -fn normalize_labels<'input>( - module: TranslationModule<'input, ExpandedArgParams>, -) -> Result, TranslateError> { - convert_methods_simple(module, normalize_labels2_impl) -} - -fn normalize_labels2_impl<'input>( - id_defs: &mut IdNameMapBuilder<'input>, - fn_body: Vec, -) -> Result, TranslateError> { - let mut labels_in_use = FxHashSet::default(); - for statement in fn_body.iter() { - match statement { - Statement::Instruction(i) => { - if let Some(target) = i.jump_target() { - labels_in_use.insert(target); - } - } - Statement::Conditional(cond) => { - labels_in_use.insert(cond.if_true); - labels_in_use.insert(cond.if_false); - } - Statement::Call(..) - | Statement::Variable(..) - | Statement::LoadVar(..) - | Statement::StoreVar(..) - | Statement::RetValue(..) - | Statement::Conversion(..) - | Statement::Constant(..) - | Statement::Label(..) - | Statement::PtrAccess { .. } - | Statement::RepackVector(..) - | Statement::MadC(..) - | Statement::MadCC(..) - | Statement::AddC(..) - | Statement::AddCC(..) - | Statement::SubC(..) - | Statement::SubCC(..) - | Statement::AsmVolatile { .. } - | Statement::FunctionPointer(..) => {} - } - } - Ok( - iter::once(Statement::Label(id_defs.register_intermediate(None))) - .chain(fn_body.into_iter().filter(|s| match s { - Statement::Label(i) => labels_in_use.contains(i), - _ => true, - })) - .collect::>(), - ) -} - fn hoist_globals<'input, P: ast::ArgParams>( module: TranslationModule<'input, P>, ) -> TranslationModule<'input, P> { @@ -3410,9 +3358,7 @@ fn to_llvm_module_impl2<'a, 'input>( } let translation_module = insert_implicit_conversions(translation_module)?; let translation_module = insert_compilation_mode_prologue(translation_module); - let translation_module = normalize_labels(translation_module)?; let translation_module = hoist_globals(translation_module); - let translation_module = move_variables_to_start(translation_module)?; let mut translation_module = replace_instructions_with_builtins(translation_module)?; if raytracing.is_some() { translation_module = raytracing::replace_tex_builtins_hack(translation_module)?; @@ -3439,49 +3385,6 @@ fn to_llvm_module_impl2<'a, 'input>( }) } -// From "Performance Tips for Frontend Authors" (https://llvm.org/docs/Frontend/PerformanceTips.html): -// "The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt to eliminate alloca -// instructions that are in the entry basic block. Given SSA is the canonical form expected by much -// of the optimizer; if allocas can not be eliminated by Mem2Reg or SROA, the optimizer is likely to -// be less effective than it could be." -// Empirically, this is true. Moving allocas to the start gives us less spill-happy assembly -fn move_variables_to_start<'input, P: ast::ArgParams>( - module: TranslationModule<'input, P>, -) -> Result, TranslateError> { - convert_methods_simple(module, move_variables_to_start_impl) -} - -fn move_variables_to_start_impl<'input, P: ast::ArgParams>( - _: &mut IdNameMapBuilder<'input>, - fn_body: Vec, P>>, -) -> Result, P>>, TranslateError> { - if fn_body.is_empty() { - return Ok(fn_body); - } - let mut result = (0..fn_body.len()) - .into_iter() - .map(|_| mem::MaybeUninit::<_>::uninit()) - .collect::>(); - let variables_count = fn_body.iter().fold(0, |acc, statement| { - acc + matches!(statement, Statement::Variable(..)) as usize - }); - let mut variable = 1usize; - let mut non_variable = variables_count + 1; - // methods always start with an entry label - let mut statements = fn_body.into_iter(); - let start_label = statements.next().ok_or_else(TranslateError::unreachable)?; - unsafe { result.get_unchecked_mut(0).write(start_label) }; - for statement in statements { - let index = match statement { - Statement::Variable(_) => &mut variable, - _ => &mut non_variable, - }; - unsafe { result.get_unchecked_mut(*index).write(statement) }; - *index += 1; - } - Ok(unsafe { mem::transmute(result) }) -} - // PTX definition of param state space does not translate cleanly into AMDGPU notion of an address space: //  .param in kernel arguments matches AMDGPU constant address space // .param in function arguments and variables matches AMDGPU private address space @@ -6901,15 +6804,6 @@ pub(crate) enum TypeKind { Struct, } -impl> ast::Instruction { - fn jump_target(&self) -> Option { - match self { - ast::Instruction::Bra(_, a) => Some(a.src), - _ => None, - } - } -} - impl ast::Instruction { // .wide instructions don't support ftz, so it's enough to just look at the // type declared by the instruction