Skip to content

Commit

Permalink
Set context/membase registers to rsi/rdi respectively.
Browse files Browse the repository at this point in the history
Use offsetof for the backend thunks.
  • Loading branch information
DrChat committed Aug 26, 2016
1 parent fbedb91 commit 7683069
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 242 deletions.
158 changes: 73 additions & 85 deletions src/xenia/cpu/backend/x64/x64_backend.cc
Expand Up @@ -388,8 +388,8 @@ X64ThunkEmitter::~X64ThunkEmitter() {}

HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// rcx = target
// rdx = arg0
// r8 = arg1
// rdx = arg0 (context)
// r8 = arg1 (guest return address)

const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
Expand All @@ -399,52 +399,52 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
sub(rsp, stack_size);

// Preserve nonvolatile registers.
mov(qword[rsp + 40], rbx);
mov(qword[rsp + 48], rcx);
mov(qword[rsp + 56], rbp);
mov(qword[rsp + 64], rsi);
mov(qword[rsp + 72], rdi);
mov(qword[rsp + 80], r12);
mov(qword[rsp + 88], r13);
mov(qword[rsp + 96], r14);
mov(qword[rsp + 104], r15);

movaps(ptr[rsp + 112], xmm6);
movaps(ptr[rsp + 128], xmm7);
movaps(ptr[rsp + 144], xmm8);
movaps(ptr[rsp + 160], xmm9);
movaps(ptr[rsp + 176], xmm10);
movaps(ptr[rsp + 192], xmm11);
movaps(ptr[rsp + 208], xmm12);
movaps(ptr[rsp + 224], xmm13);
movaps(ptr[rsp + 240], xmm14);
movaps(ptr[rsp + 256], xmm15);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);

movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm9);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm10);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm11);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[6])], xmm12);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);

mov(rax, rcx);
mov(rcx, rdx);
mov(rsi, rdx); // context
mov(rdx, r8);
call(rax);

movaps(xmm6, ptr[rsp + 112]);
movaps(xmm7, ptr[rsp + 128]);
movaps(xmm8, ptr[rsp + 144]);
movaps(xmm9, ptr[rsp + 160]);
movaps(xmm10, ptr[rsp + 176]);
movaps(xmm11, ptr[rsp + 192]);
movaps(xmm12, ptr[rsp + 208]);
movaps(xmm13, ptr[rsp + 224]);
movaps(xmm14, ptr[rsp + 240]);
movaps(xmm15, ptr[rsp + 256]);

mov(rbx, qword[rsp + 40]);
mov(rcx, qword[rsp + 48]);
mov(rbp, qword[rsp + 56]);
mov(rsi, qword[rsp + 64]);
mov(rdi, qword[rsp + 72]);
mov(r12, qword[rsp + 80]);
mov(r13, qword[rsp + 88]);
mov(r14, qword[rsp + 96]);
mov(r15, qword[rsp + 104]);
movaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
movaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
movaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
movaps(xmm9, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
movaps(xmm10, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
movaps(xmm11, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
movaps(xmm12, qword[rsp + offsetof(StackLayout::Thunk, xmm[6])]);
movaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
movaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
movaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);

mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);

add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
Expand All @@ -469,34 +469,40 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);

mov(qword[rsp + 40], rbx);
mov(qword[rsp + 48], rcx);
mov(qword[rsp + 56], rbp);
mov(qword[rsp + 64], rsi);
mov(qword[rsp + 72], rdi);
mov(qword[rsp + 80], r12);
mov(qword[rsp + 88], r13);
mov(qword[rsp + 96], r14);
mov(qword[rsp + 104], r15);

// TODO(benvanik): save things? XMM0-5?
// HACK: Some emulated vector instructions require that we don't touch xmm0.
// Save off volatile registers.
// TODO(DrChat): Enable this when we actually need this.
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rcx);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdx);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r8);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r9);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r10);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r11);

// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5);

mov(rax, rdx);
mov(rcx, rsi); // context
mov(rdx, r8);
mov(r8, r9);
mov(r9, r10);
call(rax);

mov(rbx, qword[rsp + 40]);
mov(rcx, qword[rsp + 48]);
mov(rbp, qword[rsp + 56]);
mov(rsi, qword[rsp + 64]);
mov(rdi, qword[rsp + 72]);
mov(r12, qword[rsp + 80]);
mov(r13, qword[rsp + 88]);
mov(r14, qword[rsp + 96]);
mov(r15, qword[rsp + 104]);
// movaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
// movaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
// movaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
// movaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
// movaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);

// mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
// mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
// mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
// mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
// mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
// mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);

add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
Expand All @@ -514,36 +520,18 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
// ebx = target PPC address
// rcx = context

const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
uint32_t stack_size = 0x18;

// rsp + 0 = return address
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);

mov(qword[rsp + 40], rbx);
mov(qword[rsp + 48], rcx);
mov(qword[rsp + 56], rbp);
mov(qword[rsp + 64], rsi);
mov(qword[rsp + 72], rdi);
mov(qword[rsp + 80], r12);
mov(qword[rsp + 88], r13);
mov(qword[rsp + 96], r14);
mov(qword[rsp + 104], r15);

mov(rcx, rsi); // context
mov(rdx, rbx);
mov(rax, uint64_t(&ResolveFunction));
call(rax);

mov(rbx, qword[rsp + 40]);
mov(rcx, qword[rsp + 48]);
mov(rbp, qword[rsp + 56]);
mov(rsi, qword[rsp + 64]);
mov(rdi, qword[rsp + 72]);
mov(r12, qword[rsp + 80]);
mov(r13, qword[rsp + 88]);
mov(r14, qword[rsp + 96]);
mov(r15, qword[rsp + 104]);

add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
Expand Down
4 changes: 4 additions & 0 deletions src/xenia/cpu/backend/x64/x64_code_cache.cc
Expand Up @@ -181,6 +181,10 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
// Copy code.
std::memcpy(code_address, machine_code, code_size);

// Fill unused slots with 0xCC
std::memset(code_address + code_size, 0xCC,
xe::round_up(code_size, 16) - code_size);

// Notify subclasses of placed code.
PlaceCode(guest_address, machine_code, code_size, stack_size, code_address,
unwind_reservation);
Expand Down
57 changes: 25 additions & 32 deletions src/xenia/cpu/backend/x64/x64_emitter.cc
Expand Up @@ -168,7 +168,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
*out_stack_size = stack_size;
stack_size_ = stack_size;
sub(rsp, (uint32_t)stack_size);
mov(qword[rsp + StackLayout::GUEST_CTX_HOME], rcx);
mov(qword[rsp + StackLayout::GUEST_CTX_HOME], GetContextReg());
mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx);
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0);

Expand Down Expand Up @@ -201,7 +201,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
}

// Load membase.
mov(rdx, qword[rcx + offsetof(ppc::PPCContext, virtual_membase)]);
mov(GetMembaseReg(),
qword[GetContextReg() + offsetof(ppc::PPCContext, virtual_membase)]);

// Body.
auto block = builder->first_block();
Expand Down Expand Up @@ -233,7 +234,7 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
L(epilog_label);
epilog_label_ = nullptr;
EmitTraceUserCallReturn();
mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]);
add(rsp, (uint32_t)stack_size);
ret();

Expand Down Expand Up @@ -272,8 +273,8 @@ void X64Emitter::MarkSourceOffset(const Instr* i) {
}

void X64Emitter::EmitGetCurrentThreadId() {
// rcx must point to context. We could fetch from the stack if needed.
mov(ax, word[rcx + offsetof(ppc::PPCContext, thread_id)]);
// rsi must point to context. We could fetch from the stack if needed.
mov(ax, word[GetContextReg() + offsetof(ppc::PPCContext, thread_id)]);
}

void X64Emitter::EmitTraceUserCallReturn() {}
Expand Down Expand Up @@ -372,10 +373,9 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
// Not too important because indirection table is almost always available.
// TODO: Overwrite the call-site with a straight call.
mov(rax, reinterpret_cast<uint64_t>(ResolveFunction));
mov(rcx, GetContextReg());
mov(rdx, function->address());
call(rax);
ReloadECX();
ReloadEDX();
}

// Actually jump/call to rax.
Expand Down Expand Up @@ -417,9 +417,8 @@ void X64Emitter::CallIndirect(const hir::Instr* instr,
// Not too important because indirection table is almost always available.
mov(edx, reg.cvt32());
mov(rax, reinterpret_cast<uint64_t>(ResolveFunction));
mov(rcx, GetContextReg());
call(rax);
ReloadECX();
ReloadEDX();
}

// Actually jump/call to rax.
Expand Down Expand Up @@ -461,14 +460,13 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
// rdx = target host function
// r8 = arg0
// r9 = arg1
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(builtin_function->handler()));
mov(r8, reinterpret_cast<uint64_t>(builtin_function->arg0()));
mov(r9, reinterpret_cast<uint64_t>(builtin_function->arg1()));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}
} else if (function->behavior() == Function::Behavior::kExtern) {
Expand All @@ -477,13 +475,12 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
undefined = false;
// rcx = context
// rdx = target host function
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
mov(r8, qword[rcx + offsetof(ppc::PPCContext, kernel_state)]);
mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, kernel_state)]);
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}
}
Expand All @@ -494,32 +491,28 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {

void X64Emitter::CallNative(void* fn) {
mov(rax, reinterpret_cast<uint64_t>(fn));
mov(rcx, GetContextReg());
call(rax);
ReloadECX();
ReloadEDX();
}

void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
mov(rcx, GetContextReg());
call(rax);
ReloadECX();
ReloadEDX();
}

void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) {
mov(rax, reinterpret_cast<uint64_t>(fn));
mov(rcx, GetContextReg());
call(rax);
ReloadECX();
ReloadEDX();
}

void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
uint64_t arg0) {
mov(rdx, arg0);
mov(rax, reinterpret_cast<uint64_t>(fn));
mov(rcx, GetContextReg());
mov(rdx, arg0);
call(rax);
ReloadECX();
ReloadEDX();
}

void X64Emitter::CallNativeSafe(void* fn) {
Expand All @@ -528,12 +521,11 @@ void X64Emitter::CallNativeSafe(void* fn) {
// r8 = arg0
// r9 = arg1
// r10 = arg2
mov(rdx, reinterpret_cast<uint64_t>(fn));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(fn));
call(rax);
ReloadECX();
ReloadEDX();
// rax = host return
}

Expand All @@ -542,15 +534,16 @@ void X64Emitter::SetReturnAddress(uint64_t value) {
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax);
}

Xbyak::Reg64 X64Emitter::GetContextReg() { return rcx; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdx; }
// Important: If you change these, you must update the thunks in x64_backend.cc!
Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; }

void X64Emitter::ReloadECX() {
mov(rcx, qword[rsp + StackLayout::GUEST_CTX_HOME]);
void X64Emitter::ReloadContext() {
mov(GetContextReg(), qword[rsp + StackLayout::GUEST_CTX_HOME]);
}

void X64Emitter::ReloadEDX() {
mov(rdx, qword[rcx + 8]); // membase
void X64Emitter::ReloadMembase() {
mov(GetMembaseReg(), qword[GetContextReg() + 8]); // membase
}

// Len Assembly Byte Sequence
Expand Down
4 changes: 2 additions & 2 deletions src/xenia/cpu/backend/x64/x64_emitter.h
Expand Up @@ -173,8 +173,8 @@ class X64Emitter : public Xbyak::CodeGenerator {

Xbyak::Reg64 GetContextReg();
Xbyak::Reg64 GetMembaseReg();
void ReloadECX();
void ReloadEDX();
void ReloadContext();
void ReloadMembase();

void nop(size_t length = 1);

Expand Down

0 comments on commit 7683069

Please sign in to comment.