From 0180aec9b832267462e7d6bfc1659ac5bd32350a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 5 Feb 2021 10:34:01 -0300 Subject: [PATCH] futex2: Implement requeue operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement requeue interface similary to FUTEX_CMP_REQUEUE operation. This is the syscall implemented by this patch: futex_requeue(struct futex_requeue *uaddr1, struct futex_requeue *uaddr2, unsigned int nr_wake, unsigned int nr_requeue, unsigned int cmpval, unsigned int flags) struct futex_requeue { void *uaddr; unsigned int flags; }; If (uaddr1->uaddr == cmpval), wake at uaddr1->uaddr a nr_wake number of waiters and then, remove a number of nr_requeue waiters at uaddr1->uaddr and add them to uaddr2->uaddr list. Each uaddr has its own set of flags, that must be defined at struct futex_requeue (such as size, shared, NUMA). The flags argument of the syscall is there just for the sake of extensibility, and right now it needs to be zero. Return the number of the woken futexes + the number of requeued ones on success, error code otherwise. Signed-off-by: André Almeida Rebased-by: Joshua Ashton --- arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/linux/compat.h | 12 ++ include/linux/syscalls.h | 5 + include/uapi/asm-generic/unistd.h | 5 +- include/uapi/linux/futex.h | 10 ++ kernel/futex2.c | 215 +++++++++++++++++++++++++ kernel/sys_ni.c | 1 + 10 files changed, 251 insertions(+), 2 deletions(-) diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 6e476c34bd0037..25f175ada125da 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -463,3 +463,4 @@ 447 common futex_wait sys_futex_wait 448 common futex_wake sys_futex_wake 449 common futex_waitv sys_futex_waitv +450 common futex_requeue sys_futex_requeue diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6bdb5f5db438c4..4e65da3445c7aa 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 450 +#define __NR_compat_syscalls 451 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index b991991a434a64..1c3ca8b5024767 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -454,3 +454,4 @@ 447 i386 futex_wait sys_futex_wait 448 i386 futex_wake sys_futex_wake 449 i386 futex_waitv sys_futex_waitv compat_sys_futex_waitv +450 i386 futex_requeue sys_futex_requeue compat_sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index bad4aca3e9ba7d..a1a39ed156e866 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,6 +371,7 @@ 447 common futex_wait sys_futex_wait 448 common futex_wake sys_futex_wake 449 common futex_waitv sys_futex_waitv +450 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/include/linux/compat.h b/include/linux/compat.h index 06a40776d8a5d8..34ad63bac18de3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -374,6 +374,11 @@ struct compat_futex_waitv { compat_uint_t flags; }; +struct compat_futex_requeue { + compat_uptr_t uaddr; + compat_uint_t flags; +}; + #ifdef CONFIG_COMPAT_OLD_SIGACTION struct compat_old_sigaction { compat_uptr_t sa_handler; @@ -703,6 +708,13 @@ asmlinkage long compat_sys_futex_waitv(struct compat_futex_waitv *waiters, compat_uint_t nr_futexes, compat_uint_t flags, struct __kernel_timespec __user *timo); +asmlinkage long compat_sys_futex_requeue(struct compat_futex_requeue *uaddr1, + struct compat_futex_requeue *uaddr2, + compat_uint_t nr_wake, + compat_uint_t nr_requeue, + compat_uint_t cmpval, + compat_uint_t flags); + /* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, struct old_itimerval32 __user *it); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7d166f7304ae3d..aca64b5126a7b3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -72,6 +72,7 @@ struct mount_attr; struct landlock_ruleset_attr; enum landlock_rule_type; struct futex_waitv; +struct futex_requeue; #include #include @@ -633,6 +634,10 @@ asmlinkage long sys_futex_wake(void __user *uaddr, unsigned int nr_wake, asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timo); +asmlinkage long sys_futex_requeue(struct futex_requeue __user *uaddr1, + struct futex_requeue __user *uaddr2, + unsigned int nr_wake, unsigned int nr_requeue, + unsigned int cmpval, unsigned int flags); /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index de966a1016d6be..c0d552f660a765 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -881,8 +881,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_waitv 445 __SC_COMP(__NR_futex_waitv, sys_futex_waitv, compat_sys_futex_waitv) +#define __NR_futex_requeue 446 +__SC_COMP(__NR_futex_requeue, sys_futex_requeue, compat_sys_futex_requeue) + #undef __NR_syscalls -#define __NR_syscalls 450 +#define __NR_syscalls 451 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 3216aee015d212..c15bfddcf1e208 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -62,6 +62,16 @@ struct futex_waitv { unsigned int flags; }; +/** + * struct futex_requeue - Define an address and its flags for requeue operation + * @uaddr: User address of one of the requeue arguments + * @flags: Flags for this address + */ +struct futex_requeue { + void __user *uaddr; + unsigned int flags; +}; + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. diff --git a/kernel/futex2.c b/kernel/futex2.c index 6b2855ef681cd1..8bf7ad9642a861 100644 --- a/kernel/futex2.c +++ b/kernel/futex2.c @@ -975,6 +975,221 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, return ret; } +static void futex_double_unlock(struct futex_bucket *b1, struct futex_bucket *b2) +{ + spin_unlock(&b1->lock); + if (b1 != b2) + spin_unlock(&b2->lock); +} + +static inline int __futex_requeue(struct futex_requeue rq1, + struct futex_requeue rq2, unsigned int nr_wake, + unsigned int nr_requeue, unsigned int cmpval, + bool shared1, bool shared2) +{ + struct futex_waiter w1, w2, *aux, *tmp; + bool retry = false; + struct futex_bucket *b1, *b2; + DEFINE_WAKE_Q(wake_q); + u32 uval; + int ret; + + b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1); + if (IS_ERR(b1)) + return PTR_ERR(b1); + + b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2); + if (IS_ERR(b2)) + return PTR_ERR(b2); + +retry: + if (shared1 && retry) { + b1 = futex_get_bucket(rq1.uaddr, &w1.key, shared1); + if (IS_ERR(b1)) + return PTR_ERR(b1); + } + + if (shared2 && retry) { + b2 = futex_get_bucket(rq2.uaddr, &w2.key, shared2); + if (IS_ERR(b2)) + return PTR_ERR(b2); + } + + bucket_inc_waiters(b2); + /* + * To ensure the locks are taken in the same order for all threads (and + * thus avoiding deadlocks), take the "smaller" one first + */ + if (b1 <= b2) { + spin_lock(&b1->lock); + if (b1 < b2) + spin_lock_nested(&b2->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&b2->lock); + spin_lock_nested(&b1->lock, SINGLE_DEPTH_NESTING); + } + + ret = futex_get_user(&uval, rq1.uaddr); + + if (unlikely(ret)) { + futex_double_unlock(b1, b2); + if (__get_user(uval, (u32 __user *)rq1.uaddr)) + return -EFAULT; + + bucket_dec_waiters(b2); + retry = true; + goto retry; + } + + if (uval != cmpval) { + futex_double_unlock(b1, b2); + + bucket_dec_waiters(b2); + return -EAGAIN; + } + + list_for_each_entry_safe(aux, tmp, &b1->list, list) { + if (futex_match(w1.key, aux->key)) { + if (ret < nr_wake) { + futex_mark_wake(aux, b1, &wake_q); + ret++; + continue; + } + + if (ret >= nr_wake + nr_requeue) + break; + + aux->key.pointer = w2.key.pointer; + aux->key.index = w2.key.index; + aux->key.offset = w2.key.offset; + + if (b1 != b2) { + list_del_init_careful(&aux->list); + bucket_dec_waiters(b1); + + list_add_tail(&aux->list, &b2->list); + bucket_inc_waiters(b2); + } + ret++; + } + } + + futex_double_unlock(b1, b2); + wake_up_q(&wake_q); + bucket_dec_waiters(b2); + + return ret; +} + +#ifdef CONFIG_COMPAT +static int compat_futex_parse_requeue(struct futex_requeue *rq, + struct compat_futex_requeue __user *uaddr, + bool *shared) +{ + struct compat_futex_requeue tmp; + + if (copy_from_user(&tmp, uaddr, sizeof(tmp))) + return -EFAULT; + + if (tmp.flags & ~FUTEXV_WAITER_MASK || + (tmp.flags & FUTEX_SIZE_MASK) != FUTEX_32) + return -EINVAL; + + *shared = (tmp.flags & FUTEX_SHARED_FLAG) ? true : false; + + rq->uaddr = compat_ptr(tmp.uaddr); + rq->flags = tmp.flags; + + return 0; +} + +COMPAT_SYSCALL_DEFINE6(futex_requeue, struct compat_futex_requeue __user *, uaddr1, + struct compat_futex_requeue __user *, uaddr2, + unsigned int, nr_wake, unsigned int, nr_requeue, + unsigned int, cmpval, unsigned int, flags) +{ + struct futex_requeue rq1, rq2; + bool shared1, shared2; + int ret; + + if (flags) + return -EINVAL; + + ret = compat_futex_parse_requeue(&rq1, uaddr1, &shared1); + if (ret) + return ret; + + ret = compat_futex_parse_requeue(&rq2, uaddr2, &shared2); + if (ret) + return ret; + + return __futex_requeue(rq1, rq2, nr_wake, nr_requeue, cmpval, shared1, shared2); +} +#endif + +/** + * futex_parse_requeue - Copy a user struct futex_requeue and check it's flags + * @rq: Kernel struct + * @uaddr: Address of user struct + * @shared: Out parameter, defines if this is a shared futex + * + * Return: 0 on success, error code otherwise + */ +static int futex_parse_requeue(struct futex_requeue *rq, + struct futex_requeue __user *uaddr, bool *shared) +{ + if (copy_from_user(rq, uaddr, sizeof(*rq))) + return -EFAULT; + + if (rq->flags & ~FUTEXV_WAITER_MASK || + (rq->flags & FUTEX_SIZE_MASK) != FUTEX_32) + return -EINVAL; + + *shared = (rq->flags & FUTEX_SHARED_FLAG) ? true : false; + + return 0; +} + +/** + * sys_futex_requeue - Wake futexes at uaddr1 and requeue from uaddr1 to uaddr2 + * @uaddr1: Address of futexes to be waken/dequeued + * @uaddr2: Address for the futexes to be enqueued + * @nr_wake: Number of futexes waiting in uaddr1 to be woken up + * @nr_requeue: Number of futexes to be requeued from uaddr1 to uaddr2 + * @cmpval: Expected value at uaddr1 + * @flags: Reserved flags arg for requeue operation expansion. Must be 0. + * + * If (uaddr1->uaddr == cmpval), wake at uaddr1->uaddr a nr_wake number of + * waiters and then, remove a number of nr_requeue waiters at uaddr1->uaddr + * and add then to uaddr2->uaddr list. Each uaddr has its own set of flags, + * that must be defined at struct futex_requeue (such as size, shared, NUMA). + * + * Return the number of the woken futexes + the number of requeued ones on + * success, error code otherwise. + */ +SYSCALL_DEFINE6(futex_requeue, struct futex_requeue __user *, uaddr1, + struct futex_requeue __user *, uaddr2, + unsigned int, nr_wake, unsigned int, nr_requeue, + unsigned int, cmpval, unsigned int, flags) +{ + struct futex_requeue rq1, rq2; + bool shared1, shared2; + int ret; + + if (flags) + return -EINVAL; + + ret = futex_parse_requeue(&rq1, uaddr1, &shared1); + if (ret) + return ret; + + ret = futex_parse_requeue(&rq2, uaddr2, &shared2); + if (ret) + return ret; + + return __futex_requeue(rq1, rq2, nr_wake, nr_requeue, cmpval, shared1, shared2); +} + static int __init futex2_init(void) { int i; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d70bb8cb884f1b..af0b1ef09d93a8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -155,6 +155,7 @@ COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_wait); COND_SYSCALL(futex_wake); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_requeue); /* kernel/hrtimer.c */