diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp index a13447586bd4ba..d0a2f7c27e25a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -53,13 +53,22 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) { // // extern "C" void * __init_array_start[]; // extern "C" void * __init_array_end[]; +// extern "C" void * __fini_array_start[]; +// extern "C" void * __fini_array_end[]; // // using InitCallback = void(); +// using FiniCallback = void(void); // // void call_init_array_callbacks() { // for (auto start = __init_array_start; start != __init_array_end; ++start) // reinterpret_cast(*start)(); // } +// +// void call_fini_array_callbacks() { +// size_t fini_array_size = __fini_array_end - __fini_array_start; +// for (size_t i = fini_array_size; i > 0; --i) +// reinterpret_cast(__fini_array_start[i - 1])(); +// } static void createInitOrFiniCalls(Function &F, bool IsCtor) { Module &M = *F.getParent(); LLVMContext &C = M.getContext(); @@ -96,15 +105,37 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) { // for now we just call them with no arguments. auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {}); - IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB); + Value *Start = Begin; + Value *Stop = End; + // The destructor array must be called in reverse order. Get a constant + // expression to the end of the array and iterate backwards instead. + if (!IsCtor) { + Type *Int64Ty = IntegerType::getInt64Ty(C); + auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty); + auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty); + auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr); + auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3)); + auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1)); + Start = IRB.CreateInBoundsGEP( + ArrayType::get(IRB.getPtrTy(), 0), Begin, + ArrayRef({ConstantInt::get(Int64Ty, 0), Offset})); + Stop = Begin; + } + + IRB.CreateCondBr( + IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start, + Stop), + LoopBB, ExitBB); IRB.SetInsertPoint(LoopBB); auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr"); auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()), CallBackPHI, "callback"); IRB.CreateCall(CallBackTy, CallBack); - auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next"); - auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end"); - CallBackPHI->addIncoming(Begin, &F.getEntryBlock()); + auto *NewCallBack = + IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next"); + auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT, + NewCallBack, Stop, "end"); + CallBackPHI->addIncoming(Start, &F.getEntryBlock()); CallBackPHI->addIncoming(NewCallBack, LoopBB); IRB.CreateCondBr(EndCmp, ExitBB, LoopBB); IRB.SetInsertPoint(ExitBB); diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll index a1929a2e8931c1..cc7943fd0ba76f 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll @@ -25,8 +25,6 @@ define void @bar() addrspace(1) { ret void } - - ;. ; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }] ; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }] @@ -65,13 +63,17 @@ define void @bar() addrspace(1) { ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start +; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[TMP2]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] ; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 ; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1 +; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] ; CHECK: while.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll index 96844218222972..58e1589d048327 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll @@ -12,20 +12,19 @@ @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] - - - - ; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init ; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd ; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini ; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd + ; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 ; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8 + ; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init ; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd ; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini ; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd + ; METADATA: amdhsa.kernels: ; METADATA: .kind: init ; METADATA: .max_flat_workgroup_size: 1 @@ -79,13 +78,17 @@ define internal void @bar() { ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start +; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[TMP2]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] ; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 ; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1 +; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] ; CHECK: while.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll index 83bb61d1a63235..f2d73b24c35d52 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll @@ -3,10 +3,10 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS -; UTC_ARGS: --disable @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }] @llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }] +; UTC_ARGS: --disable ; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] @@ -70,13 +70,17 @@ define internal void @bar.5() { ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), 3 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], @__fini_array_start +; CHECK-NEXT: br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[TMP2]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] ; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 ; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1 +; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] ; CHECK: while.end: ; CHECK-NEXT: ret void