-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Provide VDSO image and __kernel_vsyscall interface
This patch provides the capability to automatically generate a VDSO (Virtual Dynamic Shared Object) image and pass it to the application. The VDSO contains only symbols, while its actual implementation is retained in the kernel, with their mapping relationship recorded in `vdso_mapping.conf`. Additionally, this introduces the `__kernel_vsyscall` interface, which allows libc to access system calls without relying on slow binary syscalls. The difference between `__kernel_vsyscall` and `uk_syscall6_r` lies in considering TLS (Thread Local Storage) switching, enabling both the kernel and user applications to use thread-local storage seamlessly. To utilize this feature, the libc needs to be modified accordingly, please refer to `fork-musl` [1] and `fork-glibc` [2] for further information. To use the VDSO, please select `Application Options -> Provide VDSO Image`. [1] https://github.com/unikraft/fork-musl [2] https://github.com/unikraft/fork-glibc Signed-off-by: Tianyi Liu <i.pear@outlook.com> Reviewed-by: Ioan-Teodor Teugea <ioan_teodor.teugea@stud.acs.upb.ro> Approved-by: Razvan Deaconescu <razvand@unikraft.io> Tested-by: Unikraft CI <monkey@unikraft.io> GitHub-Closes: #23
- Loading branch information
1 parent
bbb92f8
commit 6d9ad43
Showing
10 changed files
with
251 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
$(APPELFLOADER_BASE)/vdso/libvdso.o: | ||
gcc $(APPELFLOADER_BASE)/vdso/vdso.c -c -o $(APPELFLOADER_BASE)/vdso/libvdso.o -fPIC -O2 -nostdlib | ||
@APPELFLOADER_BASE=$(APPELFLOADER_BASE) VDSO_MAGIC_NUMBER=0x369C217100000000 $(APPELFLOADER_BASE)/vdso/add_symbol.sh | ||
|
||
$(APPELFLOADER_BASE)/vdso/libvdso.so: $(APPELFLOADER_BASE)/vdso/libvdso.o | ||
ld $(APPELFLOADER_BASE)/vdso/libvdso.o -o $(APPELFLOADER_BASE)/vdso/libvdso.so --hash-style=both -soname unikraft-vdso.so.1 -shared -T $(APPELFLOADER_BASE)/vdso/vdso.lds | ||
|
||
$(APPELFLOADER_BASE)/vdso/vdso-image.c: $(APPELFLOADER_BASE)/vdso/libvdso.so | ||
python3 $(APPELFLOADER_BASE)/vdso/bin2c.py $(APPELFLOADER_BASE)/vdso/vdso_mapping.conf $(APPELFLOADER_BASE)/vdso/libvdso.so $(APPELFLOADER_BASE)/vdso/vdso-image.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
while IFS=' ' read -r vdso_symbol _; do | ||
objcopy --add-symbol $vdso_symbol=.text:$VDSO_MAGIC_NUMBER,global,function $APPELFLOADER_BASE/vdso/libvdso.o | ||
((VDSO_MAGIC_NUMBER++)) | ||
done < $APPELFLOADER_BASE/vdso/vdso_mapping.conf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import sys | ||
import functools | ||
|
||
MAGIC_NUMBER = 0x369C2171 | ||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) < 4: | ||
print('Usage: bin2c.py /path/to/vdso_mapping.conf /path/to/libvdso.so /path/to/vdso-image.c') | ||
exit(1) | ||
vdso_mapping_path = sys.argv[1] | ||
libvdso_path = sys.argv[2] | ||
vdso_image_path = sys.argv[3] | ||
|
||
vdso_mappings = [] | ||
|
||
with open(vdso_mapping_path) as f: | ||
for line in f: | ||
line = line.strip().split() | ||
vdso_mappings.append((line[0], line[1])) | ||
with open(libvdso_path, 'rb') as f: | ||
libvdso_content = list(f.read()) | ||
symbol_pos = [] | ||
for i in range(0, len(libvdso_content) - 8, 4): | ||
if functools.reduce(lambda x, y: x * 256 + y, libvdso_content[i: i + 4][::-1], 0) == MAGIC_NUMBER: | ||
symbol_pos.append([i - 4, functools.reduce(lambda x, y: x * 256 + y, libvdso_content[i - 4: i][::-1], 0), None]) | ||
assert(len(symbol_pos) % 2 == 0) | ||
symbol_pos = symbol_pos[:len(symbol_pos) // 2] | ||
if len(symbol_pos) != len(vdso_mappings): | ||
print("Error: Found {} magic numbers, and {} ones to map, count mismatch.".format(len(symbol_pos), len(vdso_mappings))) | ||
exit(1) | ||
symbol_pos.sort(key = lambda x: x[1]) | ||
for i in range(len(symbol_pos)): | ||
symbol_pos[i][2] = i | ||
symbol_pos.sort(key = lambda x: x[0]) | ||
with open(vdso_image_path, 'w') as w: | ||
generated_declaration = "" | ||
generated_assignment = "" | ||
last_offset = 0 | ||
for idx, pos in enumerate(symbol_pos): | ||
func_id = pos[2] | ||
pos = pos[0] | ||
vdso_data = libvdso_content[last_offset:pos] | ||
generated_declaration += "\tconst unsigned char vdso_data_{}[{}];\n".format(idx, pos - last_offset) | ||
generated_assignment += "\t{{\n{}\n\t}},\n".format( | ||
'\n'.join(['\t\t' + ' '.join(map(lambda x: "0x{:02X},".format(x), vdso_data[i: i + 10])) | ||
for i in range(0, len(vdso_data), 10)]) | ||
) | ||
generated_declaration += "\tvoid *__vdso_addr_{};\n".format(vdso_mappings[func_id][0]) | ||
generated_assignment += "\t\t0,\n" | ||
last_offset = pos + 8 | ||
generated_declaration += "\tconst unsigned char vdso_data_{}[{}];\n".format(len(symbol_pos), | ||
len(libvdso_content) - last_offset) | ||
vdso_data = libvdso_content[last_offset:] | ||
generated_assignment += "\t{{\n{}\n\t}}".format( | ||
'\n'.join(['\t\t' + ' '.join(map(lambda x: "0x{:02X},".format(x), vdso_data[i: i + 10])) | ||
for i in range(0, len(vdso_data), 10)]) | ||
) | ||
w.write(""" | ||
/* AUTOMATICALLY GENERATED -- DO NOT EDIT */ | ||
struct {{ | ||
{} | ||
}} vdso_image = {{ | ||
{} | ||
}}; | ||
char* vdso_image_addr; | ||
void uk_init_vdso(void) __attribute__((constructor)); | ||
{} | ||
void uk_init_vdso(void) {{ | ||
vdso_image_addr = (char*)&vdso_image; | ||
{} | ||
}} | ||
""" | ||
.format( | ||
generated_declaration, | ||
generated_assignment, | ||
'\n'.join(['extern void {}();'.format(i[1]) for i in vdso_mappings]), | ||
'\n'.join(['\tvdso_image.__vdso_addr_{} = (void*)((char*)&{} - vdso_image_addr);'.format(i[0], i[1]) for i in vdso_mappings]), | ||
)) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
/* Modified from Linux kernel */ | ||
|
||
/* | ||
* Linker script for vDSO. This is an ELF shared object prelinked to | ||
* its virtual address, and with only one read-only segment. | ||
* This script controls its layout. | ||
*/ | ||
|
||
SECTIONS | ||
{ | ||
. = SIZEOF_HEADERS; | ||
|
||
.hash : { *(.hash) } :text | ||
.gnu.hash : { *(.gnu.hash) } | ||
.dynsym : { *(.dynsym) } | ||
.dynstr : { *(.dynstr) } | ||
.gnu.version : { *(.gnu.version) } | ||
.gnu.version_d : { *(.gnu.version_d) } | ||
.gnu.version_r : { *(.gnu.version_r) } | ||
|
||
.dynamic : { *(.dynamic) } :text :dynamic | ||
|
||
.rodata : { | ||
*(.rodata*) | ||
*(.data*) | ||
*(.sdata*) | ||
*(.got.plt) *(.got) | ||
*(.gnu.linkonce.d.*) | ||
*(.bss*) | ||
*(.dynbss*) | ||
*(.gnu.linkonce.b.*) | ||
} :text | ||
|
||
/* | ||
* Discard .note.gnu.property sections which are unused and have | ||
* different alignment requirement from vDSO note sections. | ||
*/ | ||
/DISCARD/ : { | ||
*(.note.gnu.property) | ||
} | ||
.note : { *(.note.*) } :text :note | ||
|
||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr | ||
.eh_frame : { KEEP (*(.eh_frame)) } :text | ||
|
||
|
||
/* | ||
* Text is well-separated from actual data: there's plenty of | ||
* stuff that isn't used at runtime in between. | ||
*/ | ||
|
||
.text : { | ||
*(.text*) | ||
} :text | ||
|
||
|
||
.altinstructions : { *(.altinstructions) } :text | ||
.altinstr_replacement : { *(.altinstr_replacement) } :text | ||
|
||
/DISCARD/ : { | ||
*(.discard) | ||
*(.discard.*) | ||
} | ||
} | ||
|
||
|
||
/* | ||
* We must supply the ELF program headers explicitly to get just one | ||
* PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
*/ | ||
PHDRS | ||
{ | ||
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ | ||
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
note PT_NOTE FLAGS(4); /* PF_R */ | ||
eh_frame_hdr PT_GNU_EH_FRAME; | ||
} | ||
|
||
VERSION { | ||
LINUX_2.6 { | ||
global: | ||
__kernel_vsyscall; | ||
__vdso_clock_gettime; | ||
__vdso_gettimeofday; | ||
__vdso_time; | ||
__vdso_clock_getres; | ||
local: *; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
__kernel_vsyscall __kernel_vsyscall | ||
__vdso_clock_gettime uk_syscall_r_clock_gettime | ||
__vdso_gettimeofday uk_syscall_r_gettimeofday | ||
__vdso_time uk_syscall_r_time | ||
__vdso_clock_getres uk_syscall_r_clock_getres |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#include <uk/syscall.h> | ||
#include <uk/plat/syscall.h> | ||
#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS | ||
#include <uk/plat/tls.h> | ||
#include <uk/thread.h> | ||
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */ | ||
#include <uk/assert.h> | ||
#include <uk/essentials.h> | ||
|
||
long __kernel_vsyscall(long syscall_nr, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5) | ||
{ | ||
#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS | ||
struct uk_thread *self; | ||
__uptr orig_tlsp; | ||
|
||
/* Activate Unikraft TLS */ | ||
orig_tlsp = ukplat_tlsp_get(); | ||
self = uk_thread_current(); | ||
UK_ASSERT(self); | ||
ukplat_tlsp_set(self->uktlsp); | ||
_uk_syscall_ultlsp = orig_tlsp; | ||
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */ | ||
|
||
long ret = uk_syscall6_r(syscall_nr, | ||
arg0, arg1, arg2, | ||
arg3, arg4, arg5); | ||
|
||
#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS | ||
uk_thread_uktls_var(self, _uk_syscall_ultlsp) = 0x0; | ||
|
||
/* Restore original TLS only if it was _NOT_ | ||
* changed by the system call handler | ||
*/ | ||
if (likely(ukplat_tlsp_get() == self->uktlsp)) { | ||
ukplat_tlsp_set(orig_tlsp); | ||
} else { | ||
uk_pr_debug("System call updated userland TLS pointer register to %p (before: %p)\n", | ||
(void *) orig_tlsp, (void *) ukplat_tlsp_get()); | ||
} | ||
#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */ | ||
|
||
return ret; | ||
} |