[EarlyCSE] Add support for writeonly call CSE #145474

nikic · 2025-06-24T08:13:25Z

Add support for CSE of writeonly calls, similar to the existing support for readonly calls.

llvmbot · 2025-06-24T08:14:02Z

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

Changes

Add support for CSE of writeonly calls, similar to the existing support for readonly calls.

Full diff: https://github.com/llvm/llvm-project/pull/145474.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Scalar/EarlyCSE.cpp (+12-4)
(modified) llvm/test/Transforms/EarlyCSE/writeonly.ll (+147-1)

diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 381de60fcface..caa9c4cd0401f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -493,7 +493,7 @@ struct CallValue {
 
   static bool canHandle(Instruction *Inst) {
     CallInst *CI = dyn_cast<CallInst>(Inst);
-    if (!CI || !CI->onlyReadsMemory() ||
+    if (!CI || (!CI->onlyReadsMemory() && !CI->onlyWritesMemory()) ||
         // FIXME: Currently the calls which may access the thread id may
         // be considered as not accessing the memory. But this is
         // problematic for coroutines, since coroutines may resume in a
@@ -1626,14 +1626,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
       LastStore = nullptr;
 
-    // If this is a read-only call, process it.
-    if (CallValue::canHandle(&Inst)) {
+    // If this is a read-only or write-only call, process it. Skip store
+    // MemInsts, as they will be more precisely handled lateron.
+    if (CallValue::canHandle(&Inst) &&
+        (!MemInst.isValid() || !MemInst.isStore())) {
       // If we have an available version of this call, and if it is the right
       // generation, replace this instruction.
       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
       if (InVal.first != nullptr &&
           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
-                              &Inst)) {
+                              &Inst) &&
+          InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {
         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
                           << "  to: " << *InVal.first << '\n');
         if (!DebugCounter::shouldExecute(CSECounter)) {
@@ -1651,6 +1654,11 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         continue;
       }
 
+      // Increase memory generation for writes. Do this before inserting
+      // the call, so it has the generation after the write occurred.
+      if (Inst.mayWriteToMemory())
+        ++CurrentGeneration;
+
       // Otherwise, remember that we have this instruction.
       AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
       continue;
diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll
index 0bfffa3c825a3..c09b913f9ff2b 100644
--- a/llvm/test/Transforms/EarlyCSE/writeonly.ll
+++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s
+; RUN: opt -S -passes=early-cse -earlycse-debug-hash < %s | FileCheck %s --check-prefixes=CHECK,NO-MSSA
+; RUN: opt -S -passes='early-cse<memssa>' < %s | FileCheck %s --check-prefixes=CHECK,MSSA
 
 @var = global i32 undef
 declare void @foo() nounwind
@@ -15,3 +16,148 @@ define void @test() {
   store i32 2, ptr @var
   ret void
 }
+
+declare void @writeonly_void() memory(write)
+
+; Can CSE writeonly calls, including non-nounwind/willreturn.
+define void @writeonly_cse() {
+; CHECK-LABEL: @writeonly_cse(
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    ret void
+;
+  call void @writeonly_void()
+  call void @writeonly_void()
+  ret void
+}
+
+; Can CSE, loads do not matter.
+define i32 @writeonly_cse_intervening_load(ptr %p) {
+; CHECK-LABEL: @writeonly_cse_intervening_load(
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  call void @writeonly_void()
+  %v = load i32, ptr %p
+  call void @writeonly_void()
+  ret i32 %v
+}
+
+; Cannot CSE, the store may be to the same memory.
+define void @writeonly_cse_intervening_store(ptr %p) {
+; CHECK-LABEL: @writeonly_cse_intervening_store(
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    store i32 0, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    ret void
+;
+  call void @writeonly_void()
+  store i32 0, ptr %p
+  call void @writeonly_void()
+  ret void
+}
+
+; Can CSE, the store does not alias the writeonly call.
+define void @writeonly_cse_intervening_noalias_store(ptr noalias %p) {
+; NO-MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
+; NO-MSSA-NEXT:    call void @writeonly_void()
+; NO-MSSA-NEXT:    store i32 0, ptr [[P:%.*]], align 4
+; NO-MSSA-NEXT:    call void @writeonly_void()
+; NO-MSSA-NEXT:    ret void
+;
+; MSSA-LABEL: @writeonly_cse_intervening_noalias_store(
+; MSSA-NEXT:    call void @writeonly_void()
+; MSSA-NEXT:    store i32 0, ptr [[P:%.*]], align 4
+; MSSA-NEXT:    ret void
+;
+  call void @writeonly_void()
+  store i32 0, ptr %p
+  call void @writeonly_void()
+  ret void
+}
+
+; Cannot CSE loads across writeonly call.
+define i32 @load_cse_across_writeonly(ptr %p) {
+; CHECK-LABEL: @load_cse_across_writeonly(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[V1]], [[V2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v1 = load i32, ptr %p
+  call void @writeonly_void()
+  %v2 = load i32, ptr %p
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
+; Can CSE loads across eliminated writeonly call.
+define i32 @load_cse_across_csed_writeonly(ptr %p) {
+; CHECK-LABEL: @load_cse_across_csed_writeonly(
+; CHECK-NEXT:    call void @writeonly_void()
+; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+  call void @writeonly_void()
+  %v1 = load i32, ptr %p
+  call void @writeonly_void()
+  %v2 = load i32, ptr %p
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
+declare i32 @writeonly(ptr %p) memory(write)
+
+; Can CSE writeonly calls with arg and return.
+define i32 @writeonly_ret_cse(ptr %p) {
+; CHECK-LABEL: @writeonly_ret_cse(
+; CHECK-NEXT:    [[V2:%.*]] = call i32 @writeonly(ptr [[P:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
+  %v1 = call i32 @writeonly(ptr %p)
+  %v2 = call i32 @writeonly(ptr %p)
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
+; Cannot CSE writeonly calls with different arguments.
+define i32 @writeonly_different_args(ptr %p1, ptr %p2) {
+; CHECK-LABEL: @writeonly_different_args(
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @writeonly(ptr [[P1:%.*]])
+; CHECK-NEXT:    [[V2:%.*]] = call i32 @writeonly(ptr [[P2:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[V1]], [[V2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %v1 = call i32 @writeonly(ptr %p1)
+  %v2 = call i32 @writeonly(ptr %p2)
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
+
+declare void @callee()
+
+; These are weird cases where the same call is both readonly and writeonly
+; based on call-site attributes. I believe this implies that both calls are
+; actually readnone and safe to CSE, but leave them alone to be conservative.
+define void @readonly_and_writeonly() {
+; CHECK-LABEL: @readonly_and_writeonly(
+; CHECK-NEXT:    call void @callee() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    call void @callee() #[[ATTR1]]
+; CHECK-NEXT:    ret void
+;
+  call void @callee() memory(read)
+  call void @callee() memory(write)
+  ret void
+}
+
+define void @writeonly_and_readonly() {
+; CHECK-LABEL: @writeonly_and_readonly(
+; CHECK-NEXT:    call void @callee() #[[ATTR1]]
+; CHECK-NEXT:    call void @callee() #[[ATTR2]]
+; CHECK-NEXT:    ret void
+;
+  call void @callee() memory(write)
+  call void @callee() memory(read)
+  ret void
+}

nikic · 2025-06-24T08:15:42Z

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

      // If we have an available version of this call, and if it is the right
      // generation, replace this instruction.
      std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
      if (InVal.first != nullptr &&
          isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
-                              &Inst)) {
+                              &Inst) &&
+          InVal.first->mayReadFromMemory() == Inst.mayReadFromMemory()) {


I've added this to handle weird cases like the readonly_and_writeonly test, though I don't think it's strictly necessary...

nikic · 2025-06-24T08:20:18Z

This is a revival of https://reviews.llvm.org/D116609 by @d0k. This patch differs in a few ways:

Don't reset LastStore. This needs to be done for instructions that load memory, not for those that store memory.
Increment the memory generation in a way that makes trivial cases work without MemorySSA.
Don't handle storing meminsts, because the specialized handling for them is more precise.

dtcxzyw · 2025-06-24T09:54:12Z

Failed Tests (2):
Clang :: CodeGen/allow-ubsan-check-inline.c
Clang :: CodeGenCXX/auto-var-init.cpp

These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.

nikic · 2025-06-24T10:17:18Z

Blocked on #145492.

dtcxzyw · 2025-06-24T13:42:39Z

https://godbolt.org/z/e3b44Ee1G Volatile memset is treated as write-only.

nikic · 2025-06-24T13:56:57Z

Yes, see #122926.

These intrinsics introduced in #84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with #145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.

…145492) These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.

dtcxzyw

This patch eliminates many math libcalls that set errno. However, it causes some regressions when processing llvm.memset.
See the following pattern:

memset(p)
if (Cond) {
  memset(p);
  mem access with p
}

After this patch, the second memset is removed. However, MemCpyOptPass does better since it only removes the first one.

As MemCpyOpt handles memory intrinsics more precisely, can we bail out on memset here?

dtcxzyw · 2025-06-27T07:02:34Z

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

-    // If this is a read-only call, process it.
-    if (CallValue::canHandle(&Inst)) {
+    // If this is a read-only or write-only call, process it. Skip store
+    // MemInsts, as they will be more precisely handled lateron.


Suggested change

// MemInsts, as they will be more precisely handled lateron.

// MemInsts, as they will be more precisely handled later on.

This checks for the interaction with #145474.

nikic · 2025-06-27T15:22:59Z

Skipping memsets for now. I added a PhaseOrdering test in ec150a9. I think what we're mainly missing is store sinking support in DSE.

dtcxzyw

LGTM. Thank you!

…145492) These intrinsics introduced in llvm#84850 are currently marked as `memory(inaccessiblemem: write)`. This is not correct for the intended purpose of allowing per-block decisions, as such calls may get DCEd across control-flow boundaries (which will start actually happening with llvm#145474). Use `memory(inaccessiblemem: readwrite)` instead, just like all the other control-flow sensitive intrinsics.

This checks for the interaction with llvm#145474.

Add support for CSE of writeonly calls, similar to the existing support for readonly calls.

nikic requested review from fhahn, preames and dtcxzyw June 24, 2025 08:13

llvmbot added the llvm:transforms label Jun 24, 2025

nikic commented Jun 24, 2025

View reviewed changes

This was referenced Jun 24, 2025

Fuzz PR145474 dtcxzyw/llvm-fuzz-service#90

Closed

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

zyw-bot mentioned this pull request Jun 24, 2025

pre-commit: PR145474 dtcxzyw/llvm-opt-benchmark#2502

Closed

nikic mentioned this pull request Jun 24, 2025

[IR] Fix incorrect writeonly on llvm.allow.ubsan/runtime.check #145492

Merged

nikic force-pushed the earlycse-writeonly branch from 7b8b65d to c636c35 Compare June 25, 2025 08:08

llvmbot added the clang Clang issues not falling into any other category label Jun 25, 2025

dtcxzyw reviewed Jun 27, 2025

View reviewed changes

nikic added a commit that referenced this pull request Jun 27, 2025

[PhaseOrdering] Add test for memset DSE (NFC)

ec150a9

This checks for the interaction with #145474.

nikic added 4 commits June 27, 2025 17:04

[EarlyCSE] Add tests for writeonly

3d51cf4

[EarlyCSE] Add support for writeonly call CSE

13503e2

Fix clang test

a8d3da9

skip memset

2b59ce1

nikic force-pushed the earlycse-writeonly branch from c636c35 to 2b59ce1 Compare June 27, 2025 15:21

dtcxzyw approved these changes Jun 28, 2025

View reviewed changes

nikic merged commit de6b8cd into llvm:main Jun 30, 2025
7 checks passed

nikic deleted the earlycse-writeonly branch June 30, 2025 09:56

rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025

[PhaseOrdering] Add test for memset DSE (NFC)

90082de

This checks for the interaction with llvm#145474.

rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025

[EarlyCSE] Add support for writeonly call CSE (llvm#145474)

d1c4e37

Add support for CSE of writeonly calls, similar to the existing support for readonly calls.

rlavaee pushed a commit to rlavaee/llvm-project that referenced this pull request Jul 1, 2025

[EarlyCSE] Add support for writeonly call CSE (llvm#145474)

b2aca00

Add support for CSE of writeonly calls, similar to the existing support for readonly calls.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[EarlyCSE] Add support for writeonly call CSE #145474

[EarlyCSE] Add support for writeonly call CSE #145474

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

llvmbot commented Jun 24, 2025 •

edited

Loading

Uh oh!

nikic Jun 24, 2025

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw commented Jun 24, 2025

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw commented Jun 24, 2025

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw left a comment

Uh oh!

dtcxzyw Jun 27, 2025

Uh oh!

nikic commented Jun 27, 2025

Uh oh!

dtcxzyw left a comment

Uh oh!

Uh oh!

Uh oh!

	// MemInsts, as they will be more precisely handled lateron.
	// MemInsts, as they will be more precisely handled later on.

[EarlyCSE] Add support for writeonly call CSE #145474

[EarlyCSE] Add support for writeonly call CSE #145474

Uh oh!

Conversation

nikic commented Jun 24, 2025

Uh oh!

llvmbot commented Jun 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic Jun 24, 2025

Choose a reason for hiding this comment

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw commented Jun 24, 2025

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw commented Jun 24, 2025

Uh oh!

nikic commented Jun 24, 2025

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 27, 2025

Choose a reason for hiding this comment

Uh oh!

nikic commented Jun 27, 2025

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jun 24, 2025 •

edited

Loading