Skip to content

Commit

Permalink
x86/cpu: Clean up SRSO return thunk mess
Browse files Browse the repository at this point in the history
commit d43490d upstream.

Use the existing configurable return thunk. There is absolute no
justification for having created this __x86_return_thunk alternative.

To clarify, the whole thing looks like:

Zen3/4 does:

  srso_alias_untrain_ret:
	  nop2
	  lfence
	  jmp srso_alias_return_thunk
	  int3

  srso_alias_safe_ret: // aliasses srso_alias_untrain_ret just so
	  add $8, %rsp
	  ret
	  int3

  srso_alias_return_thunk:
	  call srso_alias_safe_ret
	  ud2

While Zen1/2 does:

  srso_untrain_ret:
	  movabs $foo, %rax
	  lfence
	  call srso_safe_ret           (jmp srso_return_thunk ?)
	  int3

  srso_safe_ret: // embedded in movabs instruction
	  add $8,%rsp
          ret
          int3

  srso_return_thunk:
	  call srso_safe_ret
	  ud2

While retbleed does:

  zen_untrain_ret:
	  test $0xcc, %bl
	  lfence
	  jmp zen_return_thunk
          int3

  zen_return_thunk: // embedded in the test instruction
	  ret
          int3

Where Zen1/2 flush the BTB entry using the instruction decoder trick
(test,movabs) Zen3/4 use BTB aliasing. SRSO adds a return sequence
(srso_safe_ret()) which forces the function return instruction to
speculate into a trap (UD2).  This RET will then mispredict and
execution will continue at the return site read from the top of the
stack.

Pick one of three options at boot (evey function can only ever return
once).

  [ bp: Fixup commit message uarch details and add them in a comment in
    the code too. Add a comment about the srso_select_mitigation()
    dependency on retbleed_select_mitigation(). Add moar ifdeffery for
    32-bit builds. Add a dummy srso_untrain_ret_alias() definition for
    32-bit alternatives needing the symbol. ]

Fixes: fb3bd91 ("x86/srso: Add a Speculative RAS Overflow mitigation")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230814121148.842775684@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Peter Zijlstra authored and gregkh committed Aug 23, 2023
1 parent 06bcb3d commit 4f0d18c
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 22 deletions.
5 changes: 5 additions & 0 deletions arch/x86/include/asm/nospec-branch.h
Expand Up @@ -343,9 +343,14 @@ extern void __x86_return_thunk(void);
static inline void __x86_return_thunk(void) {}
#endif

extern void zen_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);

extern void zen_untrain_ret(void);
extern void srso_untrain_ret(void);
extern void srso_untrain_ret_alias(void);

extern void entry_ibpb(void);

extern void (*x86_return_thunk)(void);
Expand Down
17 changes: 14 additions & 3 deletions arch/x86/kernel/cpu/bugs.c
Expand Up @@ -167,8 +167,13 @@ void __init cpu_select_mitigations(void)
md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
gds_select_mitigation();

/*
* srso_select_mitigation() depends and must run after
* retbleed_select_mitigation().
*/
srso_select_mitigation();
gds_select_mitigation();
}

/*
Expand Down Expand Up @@ -1037,6 +1042,9 @@ static void __init retbleed_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);

if (IS_ENABLED(CONFIG_RETHUNK))
x86_return_thunk = zen_return_thunk;

if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
pr_err(RETBLEED_UNTRAIN_MSG);
Expand Down Expand Up @@ -2451,10 +2459,13 @@ static void __init srso_select_mitigation(void)
*/
setup_force_cpu_cap(X86_FEATURE_RETHUNK);

if (boot_cpu_data.x86 == 0x19)
if (boot_cpu_data.x86 == 0x19) {
setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
else
x86_return_thunk = srso_alias_return_thunk;
} else {
setup_force_cpu_cap(X86_FEATURE_SRSO);
x86_return_thunk = srso_return_thunk;
}
srso_mitigation = SRSO_MITIGATION_SAFE_RET;
} else {
pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/vmlinux.lds.S
Expand Up @@ -522,8 +522,8 @@ INIT_PER_CPU(irq_stack_backing_store);
"fixed_percpu_data is not at start of per-cpu area");
#endif

#ifdef CONFIG_RETHUNK
. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned");
#ifdef CONFIG_RETHUNK
. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned");
. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
#endif

Expand Down
58 changes: 42 additions & 16 deletions arch/x86/lib/retpoline.S
Expand Up @@ -151,57 +151,69 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
.section .text.__x86.rethunk_untrain

SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ASM_NOP2
lfence
jmp __x86_return_thunk
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_untrain_ret_alias)
__EXPORT_THUNK(srso_untrain_ret_alias)

.section .text.__x86.rethunk_safe
#else
/* dummy definition for alternatives */
SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_UNRET_SAFE
ret
int3
SYM_FUNC_END(srso_untrain_ret_alias)
#endif

/* Needs a definition for the __x86_return_thunk alternative below. */
SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE)
#ifdef CONFIG_CPU_SRSO
add $8, %_ASM_SP
UNWIND_HINT_FUNC
#endif
ANNOTATE_UNRET_SAFE
ret
int3
SYM_FUNC_END(srso_safe_ret_alias)

.section .text.__x86.return_thunk

SYM_CODE_START(srso_alias_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
call srso_safe_ret_alias
ud2
SYM_CODE_END(srso_alias_return_thunk)

/*
* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
* 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
* 1) The RET at zen_return_thunk must be on a 64 byte boundary, for
* alignment within the BTB.
* 2) The instruction at zen_untrain_ret must contain, and not
* end with, the 0xc3 byte of the RET.
* 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
* from re-poisioning the BTB prediction.
*/
.align 64
.skip 64 - (__ret - zen_untrain_ret), 0xcc
.skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc
SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
* As executed from zen_untrain_ret, this is:
*
* TEST $0xcc, %bl
* LFENCE
* JMP __x86_return_thunk
* JMP zen_return_thunk
*
* Executing the TEST instruction has a side effect of evicting any BTB
* prediction (potentially attacker controlled) attached to the RET, as
* __x86_return_thunk + 1 isn't an instruction boundary at the moment.
* zen_return_thunk + 1 isn't an instruction boundary at the moment.
*/
.byte 0xf6

/*
* As executed from __x86_return_thunk, this is a plain RET.
* As executed from zen_return_thunk, this is a plain RET.
*
* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
*
Expand All @@ -213,13 +225,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
* With SMT enabled and STIBP active, a sibling thread cannot poison
* RET's prediction to a type of its choice, but can evict the
* prediction due to competitive sharing. If the prediction is
* evicted, __x86_return_thunk will suffer Straight Line Speculation
* evicted, zen_return_thunk will suffer Straight Line Speculation
* which will be contained safely by the INT3.
*/
SYM_INNER_LABEL(__ret, SYM_L_GLOBAL)
SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL)
ret
int3
SYM_CODE_END(__ret)
SYM_CODE_END(zen_return_thunk)

/*
* Ensure the TEST decoding / BTB invalidation is complete.
Expand All @@ -230,7 +242,7 @@ SYM_CODE_END(__ret)
* Jump back and execute the RET in the middle of the TEST instruction.
* INT3 is for SLS protection.
*/
jmp __ret
jmp zen_return_thunk
int3
SYM_FUNC_END(zen_untrain_ret)
__EXPORT_THUNK(zen_untrain_ret)
Expand All @@ -251,25 +263,39 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
.byte 0x48, 0xb8

/*
* This forces the function return instruction to speculate into a trap
* (UD2 in srso_return_thunk() below). This RET will then mispredict
* and execution will continue at the return site read from the top of
* the stack.
*/
SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
add $8, %_ASM_SP
ret
int3
int3
int3
/* end of movabs */
lfence
call srso_safe_ret
ud2
SYM_CODE_END(srso_safe_ret)
SYM_FUNC_END(srso_untrain_ret)
__EXPORT_THUNK(srso_untrain_ret)

SYM_CODE_START(__x86_return_thunk)
SYM_CODE_START(srso_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \
"call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS
call srso_safe_ret
ud2
SYM_CODE_END(srso_return_thunk)

SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
ANNOTATE_UNRET_SAFE
ret
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)

Expand Down
2 changes: 1 addition & 1 deletion tools/objtool/arch/x86/decode.c
Expand Up @@ -829,6 +829,6 @@ bool arch_is_rethunk(struct symbol *sym)

bool arch_is_embedded_insn(struct symbol *sym)
{
return !strcmp(sym->name, "__ret") ||
return !strcmp(sym->name, "zen_return_thunk") ||
!strcmp(sym->name, "srso_safe_ret");
}

0 comments on commit 4f0d18c

Please sign in to comment.