Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
target-sh4: implement gUSA algorithm
SH kernel has implemented a gUSA ("g" User Space Atomicity) support
for the user space atomicity. gUSA is triggered by a negative value
in the stack pointer (R15).

We try to identify the atomicity macros provided by the glibc and
replace them with a real atomic operation.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
  • Loading branch information
vivier committed Dec 6, 2016
1 parent fd2e837 commit 25013c1
Show file tree
Hide file tree
Showing 3 changed files with 289 additions and 5 deletions.
1 change: 1 addition & 0 deletions Makefile.objs
Expand Up @@ -159,6 +159,7 @@ trace-events-y += target-arm/trace-events
trace-events-y += target-i386/trace-events
trace-events-y += target-sparc/trace-events
trace-events-y += target-s390x/trace-events
trace-events-y += target-sh4/trace-events
trace-events-y += target-ppc/trace-events
trace-events-y += qom/trace-events
trace-events-y += linux-user/trace-events
Expand Down
10 changes: 10 additions & 0 deletions target-sh4/trace-events
@@ -0,0 +1,10 @@
# See docs/tracing.txt for syntax documentation.

# target-sh4/translate.c
gUSA_atomic_xchg(uint32_t pc, int addr_reg, int work_reg, int update_reg, int memop) "%08x: address: r%d result: r%d update with: r%d memop: %x"
gUSA_atomic_unknown_op(uint32_t pc, uint16_t opc) "%08x: unknown operation %04x"
gUSA_atomic_cmpxchg(uint32_t pc, int addr_reg, int work_reg, int cmp_reg, int update_reg, int memop) "%08x: address: r%d work: r%d cmp: r%d update with: r%d memop: %x"
gUSA_atomic_fetch_or(uint32_t pc, int addr_reg, int work_reg, int result_reg, int update_reg, int memop) "%08x: address: r%d work: r%d result: r%d update with: r%d memop: %x"
gUSA_atomic_or_fetch(uint32_t pc, int addr_reg, int work_reg, int op_reg, int memop) "%08x: address: r%d work: r%d op: r%d memop: %x"
gUSA_atomic_add_fetch(uint32_t pc, int addr_reg, int work_reg, int op_reg, int memop) "%08x: address: r%d work: r%d op: r%d memop: %x"
gUSA_atomic_addi_fetch(uint32_t pc, int addr_reg, int work_reg, int imm, int memop) "%08x: address: r%d work: r%d imm: %08x memop: %x"
283 changes: 278 additions & 5 deletions target-sh4/translate.c
Expand Up @@ -31,6 +31,7 @@

#include "trace-tcg.h"
#include "exec/log.h"
#include "trace.h"


typedef struct DisasContext {
Expand Down Expand Up @@ -363,7 +364,272 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg)
return; \
}

static void _decode_opc(DisasContext * ctx)
#if defined(CONFIG_LINUX_USER)
static void gUSA_hack(CPUSH4State * env, DisasContext * ctx,
int8_t section_size)
{
uint16_t opc;
uint32_t pc, section_end;
int work_reg, addr_reg, cmp_reg, update_reg;
TCGMemOp memop;
int opsize;

/*
* gUSA sequence is:
*
* mova 1f,r0
* .align 2
* mov r15,r1
* mov #(0f-1f),r15
* 0: ATOMIC OPERATION
* 1: mov r1,r15
*
* The negative value in r15 triggers the hack (as for the kernel)
* ABI:
* r15: -(size of atomic instruction sequence) < 0
* r0: end point
* r1: saved stack pointer
*/

if (section_size < 4) {
/* we must fit at least a load and a store */
return;
}

section_end = ctx->pc + section_size;
pc = ctx->pc + 2;

/* the memory read operation */

opc = cpu_lduw_code(env, pc);
pc += 2;

/* extract memory address and size */

switch (opc & 0xf00f) {
case 0x6000: /* mov.b @%1,%0 */
memop = MO_SB;
break;
case 0x6001: /* mov.w @%1,%0 */
memop = MO_TESW;
break;
case 0x6002: /* mov.l @%1,%0 */
memop = MO_TESL;
break;
default:
goto error;
}
opsize = opc & 0xf;
addr_reg = (opc >> 4) & 0xf;
work_reg = (opc >> 8) & 0xf;

/* check we write back the value to the memory
* using the same size and the same address register
* we can't check the address as it is dynamic
*/

opc = cpu_lduw_code(env, section_end);
if ((opc & 0xf000) != 0x2000 ||
opsize != (opc & 0xf) ||
addr_reg != ((opc >> 8) & 0xf)) {
goto error;
}
update_reg = (opc >> 4) & 0xf;

/* now decode the atomic operation */

if (section_size == 4) { /* atomic_xchg */
/* mov.l @r[addr],r[work]
* mov.l r[update],@r[addr]
*/
trace_gUSA_atomic_xchg(pc, addr_reg, work_reg, update_reg, memop);
/* just enough space for the load and store */
tcg_gen_atomic_xchg_i32(REG(work_reg), REG(addr_reg), REG(update_reg),
ctx->memidx, memop);
ctx->pc = section_end;
return;
}

opc = cpu_lduw_code(env, pc);
pc += 2;
if ((opc & 0xf00f) == 0x3000) { /* atomic_cmpxchg */
/* mov.l @r[addr],r[work]
* cmp/eq r[work],r[cmp]
* bf 1f
* mov.l r[update],@r[addr]
* 1:
*/
TCGv temp;

/* check "cmp/eq r[work],r[cmp]" */

if (section_size != 8) {
goto error;
}
cmp_reg = ((opc >> 8) & 0xf);
/* check we compare the reg we have just read */
if (work_reg != ((opc >> 4) & 0xf)) {
goto error;
}
/* check "bf 1f" */
opc = cpu_lduw_code(env, pc);
pc += 2;
if ((opc & 0xff00) != 0x8b00) {
goto error;
}
if (pc + ((int32_t) (int8_t) (opc & 0xff)) * 2 != section_end) {
goto error;
}
/* we have found a cmpxchg */
trace_gUSA_atomic_cmpxchg(pc - 2, addr_reg, work_reg, cmp_reg,
update_reg, memop);

temp = tcg_temp_new();
tcg_gen_mov_i32(temp, REG(cmp_reg));
tcg_gen_atomic_cmpxchg_i32(REG(work_reg), REG(addr_reg),
REG(cmp_reg), REG(update_reg),
ctx->memidx, memop);
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, temp, REG(work_reg));
tcg_temp_free(temp);
} else if ((opc & 0xf00f) == 0x2008) { /* compare exchange with 0*/
/* mov.l @r[addr],r[work]
* tst r[work],r[work]
* bf 0xf665c91c
* mov.l r[update],@r[addr]
* 1:
*/
TCGv cmp_val;

if (section_size != 8) {
goto error;
}

/* "tst r[work],r[work]" */
if (((opc >> 8) & 0xf) != ((opc >> 4) & 0xf)) {
goto error;
}

/* check we compare the reg we have just read */
if (work_reg != ((opc >> 4) & 0xf)) {
goto error;
}
/* check "bf 1f" */
opc = cpu_lduw_code(env, pc);
pc += 2;
if ((opc & 0xff00) != 0x8b00) {
goto error;
}
if (pc + ((int32_t) (int8_t) (opc & 0xff)) * 2 != section_end) {
goto error;
}
/* we have found a cmpxchg with 0*/
trace_gUSA_atomic_cmpxchg(pc - 2, addr_reg, work_reg, 0, update_reg,
memop);

cmp_val = tcg_const_i32(0);
tcg_gen_atomic_cmpxchg_i32(REG(work_reg), REG(addr_reg),
cmp_val, REG(update_reg),
ctx->memidx, memop);
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(work_reg), cmp_val);
tcg_temp_free(cmp_val);
} else if ((opc & 0xf00f) == 0x6003) { /* atomic_fetch_or */
/* mov.l @r[addr],r[work]
* mov r[work],r[result]
* or r[work],r[update]
* mov.l r[update],@r[addr]
*/
int result_reg;
TCGv temp;
/* "mov r[work],r[result]" */

if (section_size != 8) {
goto error;
}

/* check we move from the work_reg */

if (work_reg != ((opc >> 4) & 0xf)) {
goto error;
}
result_reg = (opc >> 8) & 0xf;

/* "or r[work],r[update]" */
opc = cpu_lduw_code(env, pc);
pc += 2;

if (work_reg != ((opc >> 4) & 0xf) ||
update_reg != ((opc >> 8) & 0xf)) {
goto error;
}

/* we have found a fetch or */
trace_gUSA_atomic_fetch_or(pc - 2, addr_reg, work_reg, result_reg,
update_reg, memop);

temp = tcg_temp_new();
tcg_gen_mov_i32(temp, REG(update_reg));
tcg_gen_atomic_fetch_or_i32(REG(work_reg), REG(addr_reg),
REG(update_reg),
ctx->memidx, memop);
tcg_gen_mov_i32(REG(result_reg), REG(work_reg));
tcg_gen_or_i32(REG(update_reg), REG(work_reg), temp);
tcg_temp_free(temp);
} else if ((opc & 0xf000) == 0x7000 ||
(opc & 0xf00f) == 0x300c || /* atomic_add_fetch */
(opc & 0xf00f) == 0x200b) /* atomic_or_fetch */ {
/* mov.l @r[addr],r[work]
* add r[op],r[work]
* mov.l r[work],@r[addr]
*/
TCGv imm;
int op_reg;

/* "add #imm,r[work]" or "add r[op],r[work]" or "or r[op],r[work]" */

if (section_size != 6) {
goto error;
}
/* work_reg and update_reg are used for the add */
if (work_reg != update_reg ||
work_reg != ((opc >> 8) & 0xf)) {
goto error;
}

switch (opc & 0xf000) {
case 0x2000:
op_reg = (opc >> 4) & 0xf;
trace_gUSA_atomic_or_fetch(pc - 2, addr_reg, work_reg, op_reg,
memop);
tcg_gen_atomic_or_fetch_i32(REG(work_reg), REG(addr_reg),
REG(op_reg), ctx->memidx, memop);
break;
case 0x3000:
op_reg = (opc >> 4) & 0xf;
trace_gUSA_atomic_add_fetch(pc - 2, addr_reg, work_reg, op_reg,
memop);
tcg_gen_atomic_add_fetch_i32(REG(work_reg), REG(addr_reg),
REG(op_reg), ctx->memidx, memop);
break;
case 0x7000:
imm = tcg_const_i32((int32_t) (int8_t) (opc & 0xff));
trace_gUSA_atomic_addi_fetch(pc - 2, addr_reg, work_reg,
(int32_t) (int8_t) (opc & 0xff),
memop);
tcg_gen_atomic_add_fetch_i32(REG(work_reg), REG(addr_reg),
imm, ctx->memidx, memop);
tcg_temp_free(imm);
break;
}
} else {
error:
trace_gUSA_atomic_unknown_op(pc - 2, opc);
return;
}
ctx->pc = section_end;
}
#endif

static void _decode_opc(CPUSH4State * env, DisasContext * ctx)
{
/* This code tries to make movcal emulation sufficiently
accurate for Linux purposes. This instruction writes
Expand Down Expand Up @@ -475,7 +741,14 @@ static void _decode_opc(DisasContext * ctx)
}
return;
case 0xe000: /* mov #imm,Rn */
tcg_gen_movi_i32(REG(B11_8), B7_0s);
#if defined(CONFIG_LINUX_USER)
if (B7_0s < 0 && B11_8 == 15) {
gUSA_hack(env, ctx, -B7_0s);
} else
#endif
{
tcg_gen_movi_i32(REG(B11_8), B7_0s);
}
return;
case 0x9000: /* mov.w @(disp,PC),Rn */
{
Expand Down Expand Up @@ -1797,11 +2070,11 @@ static void _decode_opc(DisasContext * ctx)
ctx->bstate = BS_BRANCH;
}

static void decode_opc(DisasContext * ctx)
static void decode_opc(CPUSH4State * env, DisasContext * ctx)
{
uint32_t old_flags = ctx->flags;

_decode_opc(ctx);
_decode_opc(env, ctx);

if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
if (ctx->flags & DELAY_SLOT_CLEARME) {
Expand Down Expand Up @@ -1881,7 +2154,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
}

ctx.opcode = cpu_lduw_code(env, ctx.pc);
decode_opc(&ctx);
decode_opc(env, &ctx);
ctx.pc += 2;
if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
break;
Expand Down

0 comments on commit 25013c1

Please sign in to comment.