-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Coroutines] Conditional elide coroutines based on hot/cold information #145831
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-coroutines Author: Chuanqi Xu (ChuanqiXu9) ChangesThe rationale is, it is not good to elide always. For example,
Assume Task is marked with But this may be a regression instead of an optimization if This patch tries to mitigate the problem by introduce static hot/cold information. This can be optimized further more but at least this patch makes things better. Full diff: https://github.com/llvm/llvm-project/pull/145831.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index 9115946d205a4..d4d0c0f0895bb 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -24,6 +24,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -33,6 +36,49 @@ using namespace llvm;
#define DEBUG_TYPE "coro-annotation-elide"
+static cl::opt<float> CoroElideBranchRatio(
+ "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
+ cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
+extern cl::opt<unsigned> MinBlockCounterExecution;
+
+static cl::opt<bool>
+ PrintElidedCoroutine("print-elided-coroutine-stats", cl::init(false),
+ cl::Hidden,
+ cl::desc("Print stats for elided coroutine"));
+
+static cl::opt<std::string>
+ ElideStatOutput("coro-elide-stat-output", cl::init(""), cl::Hidden,
+ cl::desc("Output file for -print-elided-coroutine-stats. "
+ "Defaults to standard error output."));
+
+// The return value is used to indicate the owner of the resources. The users
+// should use the output parameter.
+static std::unique_ptr<llvm::raw_ostream>
+getCoroElidedStatsOStream(llvm::raw_ostream *&OS) {
+ if (!PrintElidedCoroutine) {
+ OS = &llvm::nulls();
+ return nullptr;
+ }
+
+ if (ElideStatOutput.empty()) {
+ OS = &llvm::errs();
+ return nullptr;
+ }
+
+ std::error_code EC;
+ auto ret = std::make_unique<llvm::raw_fd_ostream>(ElideStatOutput, EC,
+ sys::fs::OF_Append);
+
+ if (EC) {
+ llvm::errs() << "llvm cannot open file: " << EC.message() << "\n";
+ OS = &llvm::nulls();
+ return nullptr;
+ }
+
+ OS = ret.get();
+ return ret;
+}
+
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
for (Instruction &I : F->getEntryBlock())
if (!isa<AllocaInst>(&I))
@@ -145,6 +191,37 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
+
+ llvm::raw_ostream *OS = nullptr;
+ auto _ = getCoroElidedStatsOStream(OS);
+ assert(OS && "At least we should able to get access to standard error");
+
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+ if (BFI.getBlockFreq(CB->getParent()) <
+ BFI.getEntryFreq()) {
+ static BranchProbability MinBranchProbability(
+ static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
+ MinBlockCounterExecution);
+
+ auto Prob = BranchProbability::getBranchProbability(
+ BFI.getBlockFreq(CB->getParent()).getFrequency(),
+ BFI.getEntryFreq().getFrequency());
+
+ if (Prob < MinBranchProbability) {
+ *OS << "Not eliding " << *CB
+ << " with estimated probability: " << Prob << "\n";
+ continue;
+ }
+
+ *OS << "BB Prob: \t" << Prob << "\n";
+ } else {
+ *OS << "BB Freq: \t"
+ << BFI.getBlockFreq(CB->getParent()).getFrequency() << "\n";
+ *OS << "Entry Freq: \t" << BFI.getEntryFreq().getFrequency() << "\n";
+ }
+
+ *OS << "eliding " << *CB << "\n";
+
auto *CallerN = CG.lookup(*Caller);
auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
// If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2583249e65484..1a00d173d3ae0 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
"outline candidate and original function"));
// Used to tune the minimum number of execution counts needed in the predecessor
// block to the cold edge. ie. confidence interval.
-static cl::opt<unsigned>
+cl::opt<unsigned>
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
cl::desc("Minimum block executions to consider "
"its BranchProbabilityInfo valid"));
diff --git a/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll
new file mode 100644
index 0000000000000..04c5bf0494278
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll
@@ -0,0 +1,79 @@
+; Testing elide performed its job for calls to coroutines marked safe.
+; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' -coro-elide-branch-ratio=0.55 | FileCheck %s
+
+%struct.Task = type { ptr }
+
+declare void @print(i32) nounwind
+
+; resume part of the coroutine
+define fastcc void @callee.resume(ptr dereferenceable(1)) {
+ tail call void @print(i32 0)
+ ret void
+}
+
+; destroy part of the coroutine
+define fastcc void @callee.destroy(ptr) {
+ tail call void @print(i32 1)
+ ret void
+}
+
+; cleanup part of the coroutine
+define fastcc void @callee.cleanup(ptr) {
+ tail call void @print(i32 2)
+ ret void
+}
+
+@callee.resumers = internal constant [3 x ptr] [
+ ptr @callee.resume, ptr @callee.destroy, ptr @callee.cleanup]
+
+declare void @alloc(i1) nounwind
+
+; CHECK-LABEL: define ptr @callee
+define ptr @callee(i8 %arg) {
+entry:
+ %task = alloca %struct.Task, align 8
+ %id = call token @llvm.coro.id(i32 0, ptr null,
+ ptr @callee,
+ ptr @callee.resumers)
+ %alloc = call i1 @llvm.coro.alloc(token %id)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+ store ptr %hdl, ptr %task
+ ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @callee.noalloc
+define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
+ entry:
+ %task = alloca %struct.Task, align 8
+ %id = call token @llvm.coro.id(i32 0, ptr null,
+ ptr @callee,
+ ptr @callee.resumers)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+ store ptr %hdl, ptr %task
+ ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @caller(i1 %cond)
+; Function Attrs: presplitcoroutine
+define ptr @caller(i1 %cond) #0 {
+entry:
+ br i1 %cond, label %call, label %ret
+
+call:
+ %task = call ptr @callee(i8 0) #1
+ br label %ret
+
+ret:
+ %retval = phi ptr [ %task, %call ], [ null, %entry ]
+ ret ptr %retval
+ ; CHECK-NOT: alloca
+}
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare ptr @llvm.coro.begin(token, ptr)
+declare ptr @llvm.coro.frame()
+declare ptr @llvm.coro.subfn.addr(ptr, i8)
+declare i1 @llvm.coro.alloc(token)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { coro_elide_safe }
|
@llvm/pr-subscribers-llvm-transforms Author: Chuanqi Xu (ChuanqiXu9) ChangesThe rationale is, it is not good to elide always. For example,
Assume Task is marked with But this may be a regression instead of an optimization if This patch tries to mitigate the problem by introduce static hot/cold information. This can be optimized further more but at least this patch makes things better. Full diff: https://github.com/llvm/llvm-project/pull/145831.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index 9115946d205a4..d4d0c0f0895bb 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -24,6 +24,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -33,6 +36,49 @@ using namespace llvm;
#define DEBUG_TYPE "coro-annotation-elide"
+static cl::opt<float> CoroElideBranchRatio(
+ "coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
+ cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
+extern cl::opt<unsigned> MinBlockCounterExecution;
+
+static cl::opt<bool>
+ PrintElidedCoroutine("print-elided-coroutine-stats", cl::init(false),
+ cl::Hidden,
+ cl::desc("Print stats for elided coroutine"));
+
+static cl::opt<std::string>
+ ElideStatOutput("coro-elide-stat-output", cl::init(""), cl::Hidden,
+ cl::desc("Output file for -print-elided-coroutine-stats. "
+ "Defaults to standard error output."));
+
+// The return value is used to indicate the owner of the resources. The users
+// should use the output parameter.
+static std::unique_ptr<llvm::raw_ostream>
+getCoroElidedStatsOStream(llvm::raw_ostream *&OS) {
+ if (!PrintElidedCoroutine) {
+ OS = &llvm::nulls();
+ return nullptr;
+ }
+
+ if (ElideStatOutput.empty()) {
+ OS = &llvm::errs();
+ return nullptr;
+ }
+
+ std::error_code EC;
+ auto ret = std::make_unique<llvm::raw_fd_ostream>(ElideStatOutput, EC,
+ sys::fs::OF_Append);
+
+ if (EC) {
+ llvm::errs() << "llvm cannot open file: " << EC.message() << "\n";
+ OS = &llvm::nulls();
+ return nullptr;
+ }
+
+ OS = ret.get();
+ return ret;
+}
+
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
for (Instruction &I : F->getEntryBlock())
if (!isa<AllocaInst>(&I))
@@ -145,6 +191,37 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
+
+ llvm::raw_ostream *OS = nullptr;
+ auto _ = getCoroElidedStatsOStream(OS);
+ assert(OS && "At least we should able to get access to standard error");
+
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
+ if (BFI.getBlockFreq(CB->getParent()) <
+ BFI.getEntryFreq()) {
+ static BranchProbability MinBranchProbability(
+ static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
+ MinBlockCounterExecution);
+
+ auto Prob = BranchProbability::getBranchProbability(
+ BFI.getBlockFreq(CB->getParent()).getFrequency(),
+ BFI.getEntryFreq().getFrequency());
+
+ if (Prob < MinBranchProbability) {
+ *OS << "Not eliding " << *CB
+ << " with estimated probability: " << Prob << "\n";
+ continue;
+ }
+
+ *OS << "BB Prob: \t" << Prob << "\n";
+ } else {
+ *OS << "BB Freq: \t"
+ << BFI.getBlockFreq(CB->getParent()).getFrequency() << "\n";
+ *OS << "Entry Freq: \t" << BFI.getEntryFreq().getFrequency() << "\n";
+ }
+
+ *OS << "eliding " << *CB << "\n";
+
auto *CallerN = CG.lookup(*Caller);
auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
// If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 2583249e65484..1a00d173d3ae0 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
"outline candidate and original function"));
// Used to tune the minimum number of execution counts needed in the predecessor
// block to the cold edge. ie. confidence interval.
-static cl::opt<unsigned>
+cl::opt<unsigned>
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
cl::desc("Minimum block executions to consider "
"its BranchProbabilityInfo valid"));
diff --git a/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll
new file mode 100644
index 0000000000000..04c5bf0494278
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-conditional-elide.ll
@@ -0,0 +1,79 @@
+; Testing elide performed its job for calls to coroutines marked safe.
+; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' -coro-elide-branch-ratio=0.55 | FileCheck %s
+
+%struct.Task = type { ptr }
+
+declare void @print(i32) nounwind
+
+; resume part of the coroutine
+define fastcc void @callee.resume(ptr dereferenceable(1)) {
+ tail call void @print(i32 0)
+ ret void
+}
+
+; destroy part of the coroutine
+define fastcc void @callee.destroy(ptr) {
+ tail call void @print(i32 1)
+ ret void
+}
+
+; cleanup part of the coroutine
+define fastcc void @callee.cleanup(ptr) {
+ tail call void @print(i32 2)
+ ret void
+}
+
+@callee.resumers = internal constant [3 x ptr] [
+ ptr @callee.resume, ptr @callee.destroy, ptr @callee.cleanup]
+
+declare void @alloc(i1) nounwind
+
+; CHECK-LABEL: define ptr @callee
+define ptr @callee(i8 %arg) {
+entry:
+ %task = alloca %struct.Task, align 8
+ %id = call token @llvm.coro.id(i32 0, ptr null,
+ ptr @callee,
+ ptr @callee.resumers)
+ %alloc = call i1 @llvm.coro.alloc(token %id)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+ store ptr %hdl, ptr %task
+ ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @callee.noalloc
+define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
+ entry:
+ %task = alloca %struct.Task, align 8
+ %id = call token @llvm.coro.id(i32 0, ptr null,
+ ptr @callee,
+ ptr @callee.resumers)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr null)
+ store ptr %hdl, ptr %task
+ ret ptr %task
+}
+
+; CHECK-LABEL: define ptr @caller(i1 %cond)
+; Function Attrs: presplitcoroutine
+define ptr @caller(i1 %cond) #0 {
+entry:
+ br i1 %cond, label %call, label %ret
+
+call:
+ %task = call ptr @callee(i8 0) #1
+ br label %ret
+
+ret:
+ %retval = phi ptr [ %task, %call ], [ null, %entry ]
+ ret ptr %retval
+ ; CHECK-NOT: alloca
+}
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare ptr @llvm.coro.begin(token, ptr)
+declare ptr @llvm.coro.frame()
+declare ptr @llvm.coro.subfn.addr(ptr, i8)
+declare i1 @llvm.coro.alloc(token)
+
+attributes #0 = { presplitcoroutine }
+attributes #1 = { coro_elide_safe }
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp llvm/lib/Transforms/IPO/PartialInlining.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index d4d0c0f08..3e5343033 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -197,8 +197,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
assert(OS && "At least we should able to get access to standard error");
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
- if (BFI.getBlockFreq(CB->getParent()) <
- BFI.getEntryFreq()) {
+ if (BFI.getBlockFreq(CB->getParent()) < BFI.getEntryFreq()) {
static BranchProbability MinBranchProbability(
static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
MinBlockCounterExecution);
|
The rationale is, it is not good to elide always.
For example,
Assume Task is marked with
[[clang::coro_await_elidable]]
, now we will always elide the call tobar()
into the frame offoo())
.But this may be a regression instead of an optimization if
b
is always false.This patch tries to mitigate the problem by introduce static hot/cold information. This can be optimized further more but at least this patch makes things better.