Skip to content
Permalink
Browse files

x86: generate page tables at runtime

Removes very complex boot-time generation of page tables
with a much simpler runtime generation of them at bootup.

For those x86 boards that enable the MMU in the defconfig,
set the number of page pool pages appropriately.

The MMU_RUNTIME_* flags have been removed. They were an
artifact of the old page table generation and did not
correspond to any hardware state.

Signed-off-by: Andrew Boie <andrew.p.boie@intel.com>
  • Loading branch information...
andrewboie committed Jul 31, 2019
1 parent c1864b4 commit c3b3aafaec02256cb00a7fff7b7ece819ccbe111
@@ -315,7 +315,6 @@
/arch/x86/gen_gdt.py @andrewboie
/arch/x86/gen_idt.py @andrewboie
/scripts/gen_kobject_list.py @andrewboie
/arch/x86/gen_mmu_x86.py @andrewboie
/scripts/gen_priv_stacks.py @andrewboie @agross-oss @ioannisg
/scripts/gen_syscall_header.py @andrewboie
/scripts/gen_syscalls.py @andrewboie
@@ -129,9 +129,18 @@ config X86_MMU
select MEMORY_PROTECTION
help
This options enables the memory management unit present in x86
and creates a set of page tables at build time. Requires an MMU
and creates a set of page tables at boot time. Requires an MMU
which supports PAE page tables.

config X86_MMU_PAGE_POOL_PAGES
int "Number of pages to reserve for building page tables"
default 16
depends on X86_MMU
help
Building page tables at boot requires a pool of free memory pages
to construct it. This can't be derived at build time, tune this
to your SoC's specific memory map.

config X86_NO_MELTDOWN
bool
help
@@ -20,6 +20,7 @@
/* exports (private APIs) */

GTEXT(__start)
GTEXT(z_x86_enable_paging)

/* externs */
GTEXT(z_cstart)
@@ -273,30 +274,6 @@ __csSet:
lgdt %ds:_gdt
#endif

#ifdef CONFIG_X86_MMU

/* load the page directory address into the registers*/
movl $z_x86_kernel_pdpt, %eax
movl %eax, %cr3

/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE_ENABLE, %eax
movl %eax, %cr4

/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr

/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $CR0_PG_WP_ENABLE, %eax
movl %eax, %cr0

#endif /* CONFIG_X86_MMU */

#if defined(CONFIG_X86_ENABLE_TSS)
mov $MAIN_TSS, %ax
ltr %ax
@@ -399,6 +376,31 @@ dataWords:
ret
#endif /* CONFIG_XIP */

#ifdef CONFIG_X86_MMU
z_x86_enable_paging:
/* load the page directory address into the registers*/
movl $z_x86_kernel_pdpt, %eax
movl %eax, %cr3

/* Enable PAE */
movl %cr4, %eax
orl $CR4_PAE_ENABLE, %eax
movl %eax, %cr4

/* IA32_EFER NXE bit set */
movl $0xC0000080, %ecx
rdmsr
orl $0x800, %eax
wrmsr

/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */
movl %cr0, %eax
orl $CR0_PG_WP_ENABLE, %eax
movl %eax, %cr0

ret
#endif /* CONFIG_X86_MMU */

#if defined(CONFIG_SSE)

/* SSE control & status register initial value */
@@ -24,13 +24,11 @@ MMU_BOOT_REGION((u32_t)&_image_text_start, (u32_t)&_image_text_size,
MMU_ENTRY_READ | MMU_ENTRY_USER);

MMU_BOOT_REGION((u32_t)&_image_rodata_start, (u32_t)&_image_rodata_size,
MMU_ENTRY_READ | MMU_ENTRY_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_READ | MMU_ENTRY_USER | MMU_ENTRY_EXECUTE_DISABLE);

#ifdef CONFIG_USERSPACE
MMU_BOOT_REGION((u32_t)&_app_smem_start, (u32_t)&_app_smem_size,
MMU_ENTRY_WRITE | MMU_ENTRY_RUNTIME_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);
#endif

#ifdef CONFIG_COVERAGE_GCOV
@@ -43,9 +41,7 @@ MMU_BOOT_REGION((u32_t)&__gcov_bss_start, (u32_t)&__gcov_bss_size,
* automatically for stacks.
*/
MMU_BOOT_REGION((u32_t)&__kernel_ram_start, (u32_t)&__kernel_ram_size,
MMU_ENTRY_WRITE |
MMU_ENTRY_RUNTIME_USER |
MMU_ENTRY_EXECUTE_DISABLE);
MMU_ENTRY_WRITE | MMU_ENTRY_EXECUTE_DISABLE);

/* Works for PDPT, PD, PT entries, the bits we check here are all the same.
*
@@ -298,13 +294,19 @@ static inline void tlb_flush_page(void *addr)
__asm__ ("invlpg %0" :: "m" (*page));
}

#define PDPTE_FLAGS_MASK MMU_ENTRY_PRESENT

#define PDE_FLAGS_MASK (MMU_ENTRY_WRITE | MMU_ENTRY_USER | \
PDPTE_FLAGS_MASK)

#define PTE_FLAGS_MASK (PDE_FLAGS_MASK | MMU_ENTRY_EXECUTE_DISABLE | \
MMU_ENTRY_WRITE_THROUGH | \
MMU_ENTRY_CACHING_DISABLE)

void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
x86_page_entry_data_t flags,
x86_page_entry_data_t mask, bool flush)
{
union x86_mmu_pte *pte;

u32_t addr = (u32_t)ptr;

__ASSERT((addr & MMU_PAGE_MASK) == 0U, "unaligned address provided");
@@ -319,12 +321,26 @@ void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
}

while (size != 0) {
union x86_mmu_pte *pte;
union x86_mmu_pde_pt *pde;
union x86_mmu_pdpte *pdpte;
x86_page_entry_data_t cur_flags = flags;

/* TODO we're not generating 2MB entries at the moment */
__ASSERT(X86_MMU_GET_PDE(pdpt, addr)->ps != 1, "2MB PDE found");
pte = X86_MMU_GET_PTE(pdpt, addr);
pdpte = X86_MMU_GET_PDPTE(pdpt, addr);
__ASSERT(pdpte->p == 1, "set flags on non-present PDPTE");
pdpte->value |= (flags & PDPTE_FLAGS_MASK);

pde = X86_MMU_GET_PDE(pdpt, addr);
__ASSERT(pde->p == 1, "set flags on non-present PDE");
pde->value |= (flags & PDE_FLAGS_MASK);
/* If any flags enable execution, clear execute disable at the
* page directory level
*/
if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
}

pte = X86_MMU_GET_PTE(pdpt, addr);
/* If we're setting the present bit, restore the address
* field. If we're clearing it, then the address field
* will be zeroed instead, mapping the PTE to the NULL page.
@@ -344,6 +360,169 @@ void z_x86_mmu_set_flags(struct x86_mmu_pdpt *pdpt, void *ptr, size_t size,
}
}

static char __aligned(MMU_PAGE_SIZE)
page_pool[MMU_PAGE_SIZE * CONFIG_X86_MMU_PAGE_POOL_PAGES];

static char *page_pos = page_pool + sizeof(page_pool);

static void *get_page(void)
{
page_pos -= MMU_PAGE_SIZE;

__ASSERT(page_pos >= page_pool, "out of MMU pages\n");

return page_pos;
}

__aligned(0x20) struct x86_mmu_pdpt z_x86_kernel_pdpt;
#ifdef CONFIG_X86_KPTI
__aligned(0x20) struct x86_mmu_pdpt z_x86_user_pdpt;
#endif

extern char z_shared_kernel_page_start[];

static inline bool is_within_system_ram(uintptr_t addr)
{
return (addr >= DT_PHYS_RAM_ADDR) &&
(addr < (DT_PHYS_RAM_ADDR + (DT_RAM_SIZE * 1024U)));
}

static void add_mmu_region_page(struct x86_mmu_pdpt *pdpt, uintptr_t addr,
u64_t flags, bool user_table)
{
union x86_mmu_pdpte *pdpte;
struct x86_mmu_pd *pd;
union x86_mmu_pde_pt *pde;
struct x86_mmu_pt *pt;
union x86_mmu_pte *pte;

#ifdef CONFIG_X86_KPTI
/* If we are generating a page table for user mode, and this address
* does not have the user flag set, and this address falls outside
* of system RAM, then don't bother generating any tables for it,
* we will never need them later as memory domains are limited to
* regions within system RAM.
*/
if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
!is_within_system_ram(addr)) {
return;
}
#endif

/* Setup the PDPTE entry for the address, creating a page directory
* if one didn't exist
*/
pdpte = &pdpt->entry[MMU_PDPTE_NUM(addr)];
if (pdpte->p == 0) {
pd = get_page();
pdpte->pd = ((uintptr_t)pd) >> MMU_PAGE_SHIFT;
} else {
pd = (struct x86_mmu_pd *)(pdpte->pd << MMU_PAGE_SHIFT);
}
pdpte->value |= (flags & PDPTE_FLAGS_MASK);

/* Setup the PDE entry for the address, creating a page table
* if necessary
*/
pde = &pd->entry[MMU_PDE_NUM(addr)].pt;
if (pde->p == 0) {
pt = get_page();
pde->pt = ((uintptr_t)pt) >> MMU_PAGE_SHIFT;
} else {
pt = (struct x86_mmu_pt *)(pde->pt << MMU_PAGE_SHIFT);
}
pde->value |= (flags & PDE_FLAGS_MASK);

/* Execute disable bit needs special handling, we should only set it
* at the page directory level if ALL pages have XD set (instead of
* just one).
*
* Use the 'ignored2' field to store a marker on whether any
* configured region allows execution, the CPU never looks at
* or modifies it.
*/
if ((flags & MMU_ENTRY_EXECUTE_DISABLE) == 0) {
pde->ignored2 = 1;
pde->value &= ~MMU_ENTRY_EXECUTE_DISABLE;
} else if (pde->ignored2 == 0) {
pde->value |= MMU_ENTRY_EXECUTE_DISABLE;
}

#ifdef CONFIG_X86_KPTI
if (user_table && (flags & MMU_ENTRY_USER) == 0 &&
addr != (uintptr_t)(&z_shared_kernel_page_start)) {
/* All non-user accessible pages except the shared page
* are marked non-present in the page table.
*/
return;
}
#else
ARG_UNUSED(user_table);
#endif

/* Finally set up the page table entry */
pte = &pt->entry[MMU_PAGE_NUM(addr)];
pte->page = addr >> MMU_PAGE_SHIFT;
pte->value |= (flags & PTE_FLAGS_MASK);
}

static void add_mmu_region(struct x86_mmu_pdpt *pdpt, struct mmu_region *rgn,
bool user_table)
{
size_t size;
u64_t flags;
uintptr_t addr;

__ASSERT((rgn->address & MMU_PAGE_MASK) == 0U,
"unaligned address provided");
__ASSERT((rgn->size & MMU_PAGE_MASK) == 0U,
"unaligned size provided");

addr = rgn->address;

/* Add the present flag, and filter out 'runtime user' since this
* has no meaning to the actual MMU
*/
flags = rgn->flags | MMU_ENTRY_PRESENT;

/* Iterate through the region a page at a time, creating entries as
* necessary.
*/
size = rgn->size;
while (size > 0) {
add_mmu_region_page(pdpt, addr, flags, user_table);

size -= MMU_PAGE_SIZE;
addr += MMU_PAGE_SIZE;
}
}

extern struct mmu_region z_x86_mmulist_start[];
extern struct mmu_region z_x86_mmulist_end[];

/* Called from x86's kernel_arch_init() */
void z_x86_paging_init(void)
{
size_t pages_free;

for (struct mmu_region *rgn = z_x86_mmulist_start;
rgn < z_x86_mmulist_end; rgn++) {
add_mmu_region(&z_x86_kernel_pdpt, rgn, false);
#ifdef CONFIG_X86_KPTI
add_mmu_region(&z_x86_user_pdpt, rgn, true);
#endif
}

pages_free = (page_pos - page_pool) / MMU_PAGE_SIZE;

if (pages_free != 0) {
printk("Optimal CONFIG_X86_MMU_PAGE_POOL_PAGES %zu\n",
CONFIG_X86_MMU_PAGE_POOL_PAGES - pages_free);
}

z_x86_enable_paging();
}

#ifdef CONFIG_X86_USERSPACE
int z_arch_buffer_validate(void *addr, size_t size, int write)
{

0 comments on commit c3b3aaf

Please sign in to comment.
You can’t perform that action at this time.