Skip to content

Commit

Permalink
x86/mce: Make mce_rdmsrl() panic on an inaccessible MSR
Browse files Browse the repository at this point in the history
If an exception needs to be handled while reading an MSR - which is in
most of the cases caused by a #GP on a non-existent MSR - then this
is most likely the incarnation of a BIOS or a hardware bug. Such bug
violates the architectural guarantee that MCA banks are present with all
MSRs belonging to them.

The proper fix belongs in the hardware/firmware - not in the kernel.

Handling an #MC exception which is raised while an NMI is being handled
would cause the nasty NMI nesting issue because of the shortcoming of
IRET of reenabling NMIs when executed. And the machine is in an #MC
context already so <Deity> be at its side.

Tracing MSR accesses while in #MC is another no-no due to tracing being
inherently a bad idea in atomic context:

  vmlinux.o: warning: objtool: do_machine_check()+0x4a: call to mce_rdmsrl() leaves .noinstr.text section

so remove all that "additional" functionality from mce_rdmsrl() and
provide it with a special exception handler which panics the machine
when that MSR is not accessible.

The exception handler prints a human-readable message explaining what
the panic reason is but, what is more, it panics while in the #GP
handler and latter won't have executed an IRET, thus opening the NMI
nesting issue in the case when the #MC has happened while handling
an NMI. (#MC itself won't be reenabled until MCG_STATUS hasn't been
cleared).

Suggested-by: Andy Lutomirski <luto@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
[ Add missing prototypes for ex_handler_* ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200906212130.GA28456@zn.tnic
  • Loading branch information
Borislav Petkov committed Sep 11, 2020
1 parent a0bc32b commit e2def7d
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 12 deletions.
72 changes: 60 additions & 12 deletions arch/x86/kernel/cpu/mce/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,28 @@ static int msr_to_offset(u32 msr)
return -1;
}

__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);

show_stack_regs(regs);

panic("MCA architectural violation!\n");

while (true)
cpu_relax();

return true;
}

/* MSR access wrappers used for error injection */
static u64 mce_rdmsrl(u32 msr)
{
u64 v;
DECLARE_ARGS(val, low, high);

if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
Expand All @@ -386,29 +404,59 @@ static u64 mce_rdmsrl(u32 msr)
return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}

if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
}
/*
* RDMSR on MCA MSRs should not fault. If they do, this is very much an
* architectural violation and needs to be reported to hw vendor. Panic
* the box to not allow any further progress.
*/
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
: EAX_EDX_RET(val, low, high) : "c" (msr));

return v;

return EAX_EDX_VAL(val, low, high);
}

__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);

show_stack_regs(regs);

panic("MCA architectural violation!\n");

while (true)
cpu_relax();

return true;
}

static void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;

if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);

if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
return;
}
wrmsrl(msr, v);

low = (u32)v;
high = (u32)(v >> 32);

/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}

/*
Expand Down
10 changes: 10 additions & 0 deletions arch/x86/kernel/cpu/mce/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,14 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; };
#endif

__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);

__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr);

#endif /* __X86_MCE_INTERNAL_H__ */

0 comments on commit e2def7d

Please sign in to comment.