Skip to content

Commit

Permalink
kmap: Add stray access protection for device pages
Browse files Browse the repository at this point in the history
Device managed pages may have additional protections.  These protections
need to be removed prior to valid use by kernel users.

Check for special treatment of device managed pages in kmap and take
action if needed.  Use kmap as an interface for generic kernel code
because under normal circumstances it would be a bug for general kernel
code to not use kmap prior to accessing kernel memory.  Therefore, this
should allow any valid kernel users to seamlessly use these pages
without issues.

Some users of kmap() have incorrectly used the mapped address outside of
the thread which performed the mapping.  It is not anticipated that
these 'global' mappings will be required to protect pmem as the 2
filesystems which support DAX (one of the main uses of pmem) are ext4
and xfs.  Neither of these perform such global mappings.

To handle other kmap() users mark mappings performed through kmap() and
handle faults to those pages in 1 of 3 'kmap modes' (strict, relaxed, or
silent).

Strict -- fails the mapping and will cause a kernel crash.

Relaxed -- will print a warning but update the current running thread to
have the required pkey.

Silent -- will update the current thread without the warning.  Relaxed
is the default.

The mappings protected by PKS are those originally configured when zone
device pages are added to the direct map with the PGMAP_PROT_ENABLED.
Highmem systems get a new mappings which inherently are open to those
using that mapping.  So there is no need for PKS to be involved.
Therefore, only the kmap calls in the HIGHMEM=n case are modified.

Because of the critical nature of mapping  pages the implementation is
careful to be as fast as possible when 'mapping' pages of regular DRAM.

Furthermore, it should be noted that the underlying MSR write required
on device pages when protected is better than a normal MSR write.
Specifically, WRMSR(MSR_IA32_PKRS) is not serializing but still
maintains ordering properties similar to WRPKRU.  The current SDM
section on PKRS needs updating but should be the same as that of WRPKRU.
So to quote from the WRPKRU text:

	WRPKRU will never execute speculatively. Memory accesses
	affected by PKRU register will not execute (even speculatively)
	until all prior executions of WRPKRU have completed execution
	and updated the PKRU register.

Still this will make accessing pmem more expensive from the kernel but
the overhead is minimized and most pmem users access this memory through
user page mappings which are not affected at all.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
INTERNAL NOTE: Would this be better squashed with the Device Access
Protection patch?  It seems separate but at the same time intertwined
enough to be 1 patch.
  • Loading branch information
weiny2 committed Mar 4, 2021
1 parent 068bb26 commit dcb55ac
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 2 deletions.
51 changes: 51 additions & 0 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,26 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>

#define KMAP_MODE_STRICT 0
#define KMAP_MODE_RELAXED 1
#define KMAP_MODE_SILENT 2

int kmap_mode = KMAP_MODE_RELAXED;

static int __init set_kmap_mode(char *str)
{
if (strcmp(str, "relaxed"))
kmap_mode = KMAP_MODE_RELAXED;
else if (strcmp(str, "silent"))
kmap_mode = KMAP_MODE_SILENT;
else if (strcmp(str, "strict"))
kmap_mode = KMAP_MODE_STRICT;

return 1;
}
__setup("kmap_mode=", set_kmap_mode);


/*
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
Expand Down Expand Up @@ -1168,6 +1188,35 @@ static bool handle_pks_test(unsigned long hw_error_code, struct pt_regs *regs)
}
#endif

static bool handle_pks(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
if (error_code & X86_PF_PK) {
struct extended_pt_regs *ept_regs;
struct page *page;
int pkey;

page = virt_to_page(address);

if (!page || !page_is_globally_mapped(page))
return false;

if (kmap_mode == KMAP_MODE_STRICT)
return false;

WARN_ONCE(kmap_mode == KMAP_MODE_RELAXED,
"PKS fault on globally mapped device page 0x%lu pfn %lu",
address, page_to_pfn(page));

pkey = dev_get_dev_pkey();
ept_regs = extended_pt_regs(regs);
update_pkey_val(ept_regs->thread_pkrs, pkey, 0);
return true;
}

return false;
}

/*
* Called for all faults where 'address' is part of the kernel address
* space. Might get called for faults that originate from *code* that
Expand All @@ -1184,6 +1233,8 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
if (!cpu_feature_enabled(X86_FEATURE_PKS))
WARN_ON_ONCE(hw_error_code & X86_PF_PK);

if (handle_pks(regs, hw_error_code, address))
return;

if (handle_pks_test(hw_error_code, regs))
return;
Expand Down
24 changes: 24 additions & 0 deletions include/linux/highmem-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
#ifndef _LINUX_HIGHMEM_INTERNAL_H
#define _LINUX_HIGHMEM_INTERNAL_H

static inline void dev_page_mk_readwrite(struct page *page, bool global)
{
if (!page_is_access_protected(page))
return;
if (global)
page->pgmap->flags |= PGMAP_KMAP_GLOBAL;
dev_mk_readwrite();
}

static inline void dev_page_mk_noaccess(struct page *page, bool global)
{
if (!page_is_access_protected(page))
return;
if (global)
page->pgmap->flags &= ~PGMAP_KMAP_GLOBAL;
dev_mk_noaccess();
}

/*
* Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
*/
Expand Down Expand Up @@ -142,6 +160,7 @@ static inline struct page *kmap_to_page(void *addr)
static inline void *kmap(struct page *page)
{
might_sleep();
dev_page_mk_readwrite(page, true);
return page_address(page);
}

Expand All @@ -150,13 +169,15 @@ static inline void kmap_flush_unused(void) { }

static inline void kunmap(struct page *page)
{
dev_page_mk_noaccess(page, true);
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(page_address(page));
#endif
}

static inline void *kmap_local_page(struct page *page)
{
dev_page_mk_readwrite(page, false);
return page_address(page);
}

Expand All @@ -175,12 +196,14 @@ static inline void __kunmap_local(void *addr)
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
#endif
dev_page_mk_noaccess(kmap_to_page(addr), false);
}

static inline void *kmap_atomic(struct page *page)
{
preempt_disable();
pagefault_disable();
dev_page_mk_readwrite(page, false);
return page_address(page);
}

Expand All @@ -199,6 +222,7 @@ static inline void __kunmap_atomic(void *addr)
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
#endif
dev_page_mk_noaccess(kmap_to_page(addr), false);
pagefault_enable();
preempt_enable();
}
Expand Down
1 change: 1 addition & 0 deletions include/linux/memremap.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ struct dev_pagemap_ops {

#define PGMAP_ALTMAP_VALID (1 << 0)
#define PGMAP_PROT_ENABLED (1 << 1)
#define PGMAP_KMAP_GLOBAL (1 << 2)

/**
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
Expand Down
20 changes: 20 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,7 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}

#include <linux/pkeys.h>
#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
DECLARE_STATIC_KEY_FALSE(dev_protection_static_key);

Expand All @@ -1193,6 +1194,12 @@ static inline bool page_is_access_protected(struct page *page)
return false;
}

static inline bool page_is_globally_mapped(struct page *page)
{
return page_is_access_protected(page) &&
(page->pgmap->flags & PGMAP_KMAP_GLOBAL);
}

void __dev_mk_readwrite(void);
void __dev_mk_noaccess(void);
static __always_inline void dev_mk_readwrite(void)
Expand All @@ -1205,13 +1212,26 @@ static __always_inline void dev_mk_noaccess(void)
if (static_branch_unlikely(&dev_protection_static_key))
__dev_mk_noaccess();
}

int dev_get_dev_pkey(void);
#else
static inline bool page_is_access_protected(struct page *page)
{
return false;
}

static inline void dev_mk_readwrite(void) { }
static inline void dev_mk_noaccess(void) { }

static inline bool page_is_globally_mapped(struct page *page)
{
return false;
}

int dev_get_dev_pkey(void)
{
return PKEY_INVALID;
}
#endif /* CONFIG_ZONE_DEVICE_ACCESS_PROTECTION */

/* 127: arbitrary random number, small enough to assemble well */
Expand Down
1 change: 1 addition & 0 deletions include/linux/pkeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <linux/mm.h>

#define PKEY_INVALID (INT_MIN)
enum pks_alloc_flags
{
PKS_FLAG_EXCLUSIVE = 0,
Expand Down
8 changes: 6 additions & 2 deletions mm/memremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#include <linux/xarray.h>
#include <uapi/asm-generic/mman-common.h>

#define PKEY_INVALID (INT_MIN)

static DEFINE_XARRAY(pgmap_array);

/*
Expand Down Expand Up @@ -131,6 +129,12 @@ static int __init __dev_access_protection_init(void)
return 0;
}
subsys_initcall(__dev_access_protection_init);

int dev_get_dev_pkey(void)
{
return dev_page_pkey;
}
EXPORT_SYMBOL_GPL(dev_get_dev_pkey);
#else
static pgprot_t dev_pgprot_get(struct dev_pagemap *pgmap, pgprot_t prot)
{
Expand Down

0 comments on commit dcb55ac

Please sign in to comment.