Skip to content

Commit

Permalink
ppc/s390: [isolate-data] Split builtin tables into tiers
Browse files Browse the repository at this point in the history
Port 06af754

Original Message:
  .. for more efficient access to builtins from generated code.

  Root-relative accesses tend to be faster and produce more compact
  code when the root-relative offset is small. IsolateData contains
  a few large tables (roots, external references, builtins), resulting
  in very large offsets in general.

  This CL starts by splitting the builtin table into tiers: tier 0
  is a minimal set of perf-critical builtins that should be cheap to
  access. The offset to tier 0 builtins is guaranteed to be small.

  The full builtin table also remains in IsolateData for occasions in
  which we need to lookup builtins by index.

  In future work, we can also split external references and roots into
  tiers.

  On x64, this reduces deopt exit sizes from 7 to 4 bytes and from 12
  to 9 bytes (dynamic map checks / EagerWithResume deopts).

Change-Id: I021d60b20b783da170987ffcf0327b93206f7e5d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3172917
Reviewed-by: Milad Fa <mfarazma@redhat.com>
Commit-Queue: Junliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#76967}
  • Loading branch information
Junliang Yan authored and V8 LUCI CQ committed Sep 21, 2021
1 parent d7dde47 commit dc88bdf
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 33 deletions.
42 changes: 23 additions & 19 deletions src/codegen/ppc/macro-assembler-ppc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,14 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));

Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);

if (root_array_available_ && options().isolate_independent_code) {
Label skip;
Register scratch = ip;
int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
IsolateData::builtin_entry_table_offset();
int offset = IsolateData::BuiltinEntrySlotOffset(code->builtin_id());
LoadU64(scratch, MemOperand(kRootRegister, offset), r0);
if (cond != al) b(NegateCondition(cond), &skip, cr);
Jump(scratch);
Expand All @@ -204,10 +203,10 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
} else if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
Label skip;
RecordCommentForOffHeapTrampoline(builtin_index);
RecordCommentForOffHeapTrampoline(builtin);
// Use ip directly instead of using UseScratchRegisterScope, as we do
// not preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
if (cond != al) b(NegateCondition(cond), &skip, cr);
Jump(ip);
bind(&skip);
Expand Down Expand Up @@ -274,36 +273,40 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().use_pc_relative_calls_and_jumps,
Builtins::IsIsolateIndependentBuiltin(*code));

Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);

if (root_array_available_ && options().isolate_independent_code) {
Label skip;
int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
IsolateData::builtin_entry_table_offset();
int offset = IsolateData::BuiltinEntrySlotOffset(code->builtin_id());
LoadU64(ip, MemOperand(kRootRegister, offset));
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
bind(&skip);
return;
} else if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
// Use ip directly instead of using UseScratchRegisterScope, as we do
// not preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
Label skip;
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
bind(&skip);
CallBuiltin(builtin, cond);
return;
}
DCHECK(code->IsExecutable());
int32_t target_index = AddCodeTarget(code);
Call(static_cast<Address>(target_index), rmode, cond);
}

void TurboAssembler::CallBuiltin(Builtin builtin, Condition cond) {
ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
DCHECK(Builtins::IsBuiltinId(builtin));
// Use ip directly instead of using UseScratchRegisterScope, as we do not
// preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
Label skip;
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
bind(&skip);
}

void TurboAssembler::Drop(int count) {
if (count > 0) {
AddS64(sp, sp, Operand(count * kSystemPointerSize), r0);
Expand Down Expand Up @@ -3646,8 +3649,9 @@ void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
DeoptimizeKind kind, Label* ret,
Label*) {
BlockTrampolinePoolScope block_trampoline_pool(this);
CHECK_LE(target, Builtins::kLastTier0);
LoadU64(ip, MemOperand(kRootRegister,
IsolateData::builtin_entry_slot_offset(target)));
IsolateData::BuiltinEntrySlotOffset(target)));
Call(ip);
DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
(kind == DeoptimizeKind::kLazy)
Expand Down
1 change: 1 addition & 0 deletions src/codegen/ppc/macro-assembler-ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;

void CallBuiltin(Builtin builtin, Condition cond);
void Popcnt32(Register dst, Register src);
void Popcnt64(Register dst, Register src);
// Converts the integer (untagged smi) in |src| to a double, storing
Expand Down
28 changes: 18 additions & 10 deletions src/codegen/s390/macro-assembler-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -416,14 +416,14 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));

Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);

if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
RecordCommentForOffHeapTrampoline(builtin);
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
b(cond, ip);
return;
}
Expand Down Expand Up @@ -474,21 +474,28 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,

DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));
Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);

if (target_is_builtin && options().inline_offheap_trampolines) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
Call(ip);
CallBuiltin(builtin);
return;
}
DCHECK(code->IsExecutable());
call(code, rmode);
}

void TurboAssembler::CallBuiltin(Builtin builtin) {
ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
DCHECK(Builtins::IsBuiltinId(builtin));
// Use ip directly instead of using UseScratchRegisterScope, as we do not
// preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
Call(ip);
}

void TurboAssembler::Drop(int count) {
if (count > 0) {
int total = count * kSystemPointerSize;
Expand Down Expand Up @@ -4779,8 +4786,9 @@ void TurboAssembler::StoreReturnAddressAndCall(Register target) {
void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
DeoptimizeKind kind, Label* ret,
Label*) {
ASM_CODE_COMMENT(this);
LoadU64(ip, MemOperand(kRootRegister,
IsolateData::builtin_entry_slot_offset(target)));
IsolateData::BuiltinEntrySlotOffset(target)));
Call(ip);
DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
(kind == DeoptimizeKind::kLazy)
Expand Down
1 change: 1 addition & 0 deletions src/codegen/s390/macro-assembler-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;

void CallBuiltin(Builtin builtin);
void AtomicCmpExchangeHelper(Register addr, Register output,
Register old_value, Register new_value,
int start, int end, int shift_amount, int offset,
Expand Down
17 changes: 13 additions & 4 deletions src/deoptimizer/ppc/deoptimizer-ppc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,24 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/register-configuration.h"
#include "src/codegen/safepoint-table.h"
#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/isolate-data.h"

namespace v8 {
namespace internal {

// The deopt exit sizes below depend on the following IsolateData layout
// guarantees:
#define ASSERT_OFFSET(BuiltinName) \
STATIC_ASSERT(IsolateData::builtin_tier0_entry_table_offset() + \
Builtins::ToInt(BuiltinName) * kSystemPointerSize <= \
0x1000)
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Eager);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Lazy);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Soft);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Bailout);
#undef ASSERT_OFFSET

const bool Deoptimizer::kSupportsFixedDeoptExitSizes = true;
const int Deoptimizer::kNonLazyDeoptExitSize = 3 * kInstrSize;
const int Deoptimizer::kLazyDeoptExitSize = 3 * kInstrSize;
Expand Down
13 changes: 13 additions & 0 deletions src/deoptimizer/s390/deoptimizer-s390.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,23 @@
// found in the LICENSE file.

#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/isolate-data.h"

namespace v8 {
namespace internal {

// The deopt exit sizes below depend on the following IsolateData layout
// guarantees:
#define ASSERT_OFFSET(BuiltinName) \
STATIC_ASSERT(IsolateData::builtin_tier0_entry_table_offset() + \
Builtins::ToInt(BuiltinName) * kSystemPointerSize <= \
0x1000)
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Eager);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Lazy);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Soft);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Bailout);
#undef ASSERT_OFFSET

const bool Deoptimizer::kSupportsFixedDeoptExitSizes = true;
const int Deoptimizer::kNonLazyDeoptExitSize = 6 + 2;
const int Deoptimizer::kLazyDeoptExitSize = 6 + 2;
Expand Down

0 comments on commit dc88bdf

Please sign in to comment.