Skip to content

Commit

Permalink
feat: Provide VDSO image and __kernel_vsyscall interface
Browse files Browse the repository at this point in the history
This patch provides the capability to automatically generate a VDSO
(Virtual Dynamic Shared Object) image and pass it to the application.
The VDSO contains only symbols, while its actual implementation is
retained in the kernel, with their mapping relationship recorded in
`vdso_mapping.conf`.

Additionally, this introduces the `__kernel_vsyscall` interface,
which allows libc to access system calls without relying on slow
binary syscalls. The difference between `__kernel_vsyscall` and
`uk_syscall6_r` lies in considering TLS (Thread Local Storage)
switching, enabling both the kernel and user applications to use
thread-local storage seamlessly. To utilize this feature, the libc
needs to be modified accordingly, please refer to `fork-musl` [1]
and `fork-glibc` [2] for further information.

To use the VDSO, please select
`Application Options -> Provide VDSO Image`.

[1] https://github.com/unikraft/fork-musl
[2] https://github.com/unikraft/fork-glibc

Signed-off-by: Tianyi Liu <i.pear@outlook.com>
  • Loading branch information
i-Pear committed Jul 28, 2023
1 parent a7f183b commit ed0b10b
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 0 deletions.
6 changes: 6 additions & 0 deletions Config.uk
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,9 @@ config APPELFLOADER_STACK_NBPAGES
config APPELFLOADER_DEBUG
bool "Enable debug messages"
default n

config VDSO
bool "Provide VDSO Image"
default n
help
Provide a VDSO image in the kernel, and pass it to the application
9 changes: 9 additions & 0 deletions Makefile.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
$(APPELFLOADER_BASE)/vdso/libvdso.o:
gcc $(APPELFLOADER_BASE)/vdso/vdso.c -c -o $(APPELFLOADER_BASE)/vdso/libvdso.o -fPIC -O2 -nostdlib
@APPELFLOADER_BASE=$(APPELFLOADER_BASE) VDSO_MAGIC_NUMBER=0x369C217100000000 $(APPELFLOADER_BASE)/vdso/add_symbol.sh

$(APPELFLOADER_BASE)/vdso/libvdso.so: $(APPELFLOADER_BASE)/vdso/libvdso.o
ld $(APPELFLOADER_BASE)/vdso/libvdso.o -o $(APPELFLOADER_BASE)/vdso/libvdso.so --hash-style=both -soname unikraft-vdso.so.1 -shared -T $(APPELFLOADER_BASE)/vdso/vdso.lds

$(APPELFLOADER_BASE)/vdso/vdso-image.c: $(APPELFLOADER_BASE)/vdso/libvdso.so
python3 $(APPELFLOADER_BASE)/vdso/bin2c.py $(APPELFLOADER_BASE)/vdso/vdso_mapping.conf $(APPELFLOADER_BASE)/vdso/libvdso.so $(APPELFLOADER_BASE)/vdso/vdso-image.c
5 changes: 5 additions & 0 deletions Makefile.uk
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ UK_PROVIDED_SYSCALLS-$(CONFIG_APPELFLOADER_BRK) += brk-1

APPELFLOADER_SRCS-$(CONFIG_APPELFLOADER_ARCH_PRCTL) += $(APPELFLOADER_BASE)/arch_prctl.c
UK_PROVIDED_SYSCALLS-$(CONFIG_APPELFLOADER_ARCH_PRCTL) += arch_prctl-3

include $(APPELFLOADER_BASE)/Makefile.rules

APPELFLOADER_SRCS-$(CONFIG_VDSO) += $(APPELFLOADER_BASE)/vdso/vdso-image.c
APPELFLOADER_SRCS-$(CONFIG_VDSO) += $(APPELFLOADER_BASE)/vdso/vsyscall.c
7 changes: 7 additions & 0 deletions elf_ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ struct auxv_entry {
long val;
};

#if CONFIG_VDSO
extern char* vdso_image_addr;
#endif

#if CONFIG_ARCH_X86_64
static const char *auxv_platform = "x86_64";
#else
Expand Down Expand Up @@ -159,6 +163,9 @@ void elf_ctx_init(struct ukarch_ctx *ctx, struct elf_prog *prog,
{ AT_PHENT, prog->phdr.entsize },
{ AT_PHNUM, prog->phdr.num },
{ AT_PHDR, prog->start + prog->phdr.off },
#if CONFIG_VDSO
{ AT_SYSINFO_EHDR, (long)vdso_image_addr},
#endif
{ AT_IGNORE, 0x0 }
};
struct auxv_entry auxv_null = { AT_NULL, 0x0 };
Expand Down
4 changes: 4 additions & 0 deletions vdso/add_symbol.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
while IFS=' ' read -r vdso_symbol _; do
objcopy --add-symbol $vdso_symbol=.text:$VDSO_MAGIC_NUMBER,global,function $APPELFLOADER_BASE/vdso/libvdso.o
((VDSO_MAGIC_NUMBER++))
done < $APPELFLOADER_BASE/vdso/vdso_mapping.conf
82 changes: 82 additions & 0 deletions vdso/bin2c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import sys
import functools

MAGIC_NUMBER = 0x369C2171

if __name__ == '__main__':
if len(sys.argv) < 4:
print('Usage: bin2c.py /path/to/vdso_mapping.conf /path/to/libvdso.so /path/to/vdso-image.c')
exit(1)
vdso_mapping_path = sys.argv[1]
libvdso_path = sys.argv[2]
vdso_image_path = sys.argv[3]

vdso_mappings = []

with open(vdso_mapping_path) as f:
for line in f:
line = line.strip().split()
vdso_mappings.append((line[0], line[1]))
with open(libvdso_path, 'rb') as f:
libvdso_content = list(f.read())
symbol_pos = []
for i in range(0, len(libvdso_content) - 8, 4):
if functools.reduce(lambda x, y: x * 256 + y, libvdso_content[i: i + 4][::-1], 0) == MAGIC_NUMBER:
symbol_pos.append([i - 4, functools.reduce(lambda x, y: x * 256 + y, libvdso_content[i - 4: i][::-1], 0), None])
assert(len(symbol_pos) % 2 == 0)
symbol_pos = symbol_pos[:len(symbol_pos) // 2]
if len(symbol_pos) != len(vdso_mappings):
print("Error: Found {} magic numbers, and {} ones to map, count mismatch.".format(len(symbol_pos), len(vdso_mappings)))
exit(1)
symbol_pos.sort(key = lambda x: x[1])
for i in range(len(symbol_pos)):
symbol_pos[i][2] = i
symbol_pos.sort(key = lambda x: x[0])
with open(vdso_image_path, 'w') as w:
generated_declaration = ""
generated_assignment = ""
last_offset = 0
for idx, pos in enumerate(symbol_pos):
func_id = pos[2]
pos = pos[0]
vdso_data = libvdso_content[last_offset:pos]
generated_declaration += "\tconst unsigned char vdso_data_{}[{}];\n".format(idx, pos - last_offset)
generated_assignment += "\t{{\n{}\n\t}},\n".format(
'\n'.join(['\t\t' + ' '.join(map(lambda x: "0x{:02X},".format(x), vdso_data[i: i + 10]))
for i in range(0, len(vdso_data), 10)])
)
generated_declaration += "\tvoid *__vdso_addr_{};\n".format(vdso_mappings[func_id][0])
generated_assignment += "\t\t0,\n"
last_offset = pos + 8
generated_declaration += "\tconst unsigned char vdso_data_{}[{}];\n".format(len(symbol_pos),
len(libvdso_content) - last_offset)
vdso_data = libvdso_content[last_offset:]
generated_assignment += "\t{{\n{}\n\t}}".format(
'\n'.join(['\t\t' + ' '.join(map(lambda x: "0x{:02X},".format(x), vdso_data[i: i + 10]))
for i in range(0, len(vdso_data), 10)])
)
w.write("""
/* AUTOMATICALLY GENERATED -- DO NOT EDIT */
struct {{
{}
}} vdso_image = {{
{}
}};
char* vdso_image_addr;
void uk_init_vdso(void) __attribute__((constructor));
{}
void uk_init_vdso(void) {{
vdso_image_addr = (char*)&vdso_image;
{}
}}
"""
.format(
generated_declaration,
generated_assignment,
'\n'.join(['extern void {}();'.format(i[1]) for i in vdso_mappings]),
'\n'.join(['\tvdso_image.__vdso_addr_{} = (void*)((char*)&{} - vdso_image_addr);'.format(i[0], i[1]) for i in vdso_mappings]),
))
Empty file added vdso/vdso.c
Empty file.
90 changes: 90 additions & 0 deletions vdso/vdso.lds
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Modified from Linux kernel */

/*
* Linker script for vDSO. This is an ELF shared object prelinked to
* its virtual address, and with only one read-only segment.
* This script controls its layout.
*/

SECTIONS
{
. = SIZEOF_HEADERS;

.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }

.dynamic : { *(.dynamic) } :text :dynamic

.rodata : {
*(.rodata*)
*(.data*)
*(.sdata*)
*(.got.plt) *(.got)
*(.gnu.linkonce.d.*)
*(.bss*)
*(.dynbss*)
*(.gnu.linkonce.b.*)
} :text

/*
* Discard .note.gnu.property sections which are unused and have
* different alignment requirement from vDSO note sections.
*/
/DISCARD/ : {
*(.note.gnu.property)
}
.note : { *(.note.*) } :text :note

.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text


/*
* Text is well-separated from actual data: there's plenty of
* stuff that isn't used at runtime in between.
*/

.text : {
*(.text*)
} :text


.altinstructions : { *(.altinstructions) } :text
.altinstr_replacement : { *(.altinstr_replacement) } :text

/DISCARD/ : {
*(.discard)
*(.discard.*)
}
}


/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
}

VERSION {
LINUX_2.6 {
global:
__kernel_vsyscall;
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
__vdso_clock_getres;
local: *;
};
}
5 changes: 5 additions & 0 deletions vdso/vdso_mapping.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__kernel_vsyscall __kernel_vsyscall
__vdso_clock_gettime uk_syscall_r_clock_gettime
__vdso_gettimeofday uk_syscall_r_gettimeofday
__vdso_time uk_syscall_r_time
__vdso_clock_getres uk_syscall_r_clock_getres
43 changes: 43 additions & 0 deletions vdso/vsyscall.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include <uk/syscall.h>
#include <uk/plat/syscall.h>
#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
#include <uk/plat/tls.h>
#include <uk/thread.h>
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
#include <uk/assert.h>
#include <uk/essentials.h>

long __kernel_vsyscall(long syscall_nr, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5)
{
#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
struct uk_thread *self;
__uptr orig_tlsp;

/* Activate Unikraft TLS */
orig_tlsp = ukplat_tlsp_get();
self = uk_thread_current();
UK_ASSERT(self);
ukplat_tlsp_set(self->uktlsp);
_uk_syscall_ultlsp = orig_tlsp;
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */

long ret = uk_syscall6_r(syscall_nr,
arg0, arg1, arg2,
arg3, arg4, arg5);

#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
uk_thread_uktls_var(self, _uk_syscall_ultlsp) = 0x0;

/* Restore original TLS only if it was _NOT_
* changed by the system call handler
*/
if (likely(ukplat_tlsp_get() == self->uktlsp)) {
ukplat_tlsp_set(orig_tlsp);
} else {
uk_pr_debug("System call updated userland TLS pointer register to %p (before: %p)\n",
(void *) orig_tlsp, (void *) ukplat_tlsp_get());
}
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */

return ret;
}

0 comments on commit ed0b10b

Please sign in to comment.