Skip to content
Permalink
Browse files

misc: Implement math_extras.h function with GCC builtins.

When available, use the GCC / Clang builtins to implement the
math_extras functions. Otherwise, use the portable versions.

Signed-off-by: Jakob Olesen <jolesen@fb.com>
  • Loading branch information...
stoklund authored and galak committed May 7, 2019
1 parent f4f09dd commit b2945d304c081a5ba4a86d8f91354f23f5ab42e0
Showing with 119 additions and 0 deletions.
  1. +119 −0 include/misc/math_extras_impl.h
@@ -13,6 +13,37 @@
#error "please include <misc/math_extras.h> instead of this file"
#endif

#include <toolchain.h>

/*
* Force the use of portable C code (no builtins) by defining
* PORTABLE_MISC_MATH_EXTRAS before including <misc/math_extras.h>.
* This is primarily for use by tests.
*
* We'll #undef use_builtin again at the end of the file.
*/
#ifdef PORTABLE_MISC_MATH_EXTRAS
#define use_builtin(x) 0
#else
#define use_builtin(x) HAS_BUILTIN(x)
#endif

#if use_builtin(__builtin_add_overflow)
static inline bool u32_add_overflow(u32_t a, u32_t b, u32_t *result)
{
return __builtin_add_overflow(a, b, result);
}

static inline bool u64_add_overflow(u64_t a, u64_t b, u64_t *result)
{
return __builtin_add_overflow(a, b, result);
}

static inline bool size_add_overflow(size_t a, size_t b, size_t *result)
{
return __builtin_add_overflow(a, b, result);
}
#else /* !use_builtin(__builtin_add_overflow) */
static inline bool u32_add_overflow(u32_t a, u32_t b, u32_t *result)
{
u32_t c = a + b;
@@ -39,7 +70,24 @@ static inline bool size_add_overflow(size_t a, size_t b, size_t *result)

return c < a;
}
#endif /* use_builtin(__builtin_add_overflow) */

#if use_builtin(__builtin_mul_overflow)
static inline bool u32_mul_overflow(u32_t a, u32_t b, u32_t *result)
{
return __builtin_mul_overflow(a, b, result);
}

static inline bool u64_mul_overflow(u64_t a, u64_t b, u64_t *result)
{
return __builtin_mul_overflow(a, b, result);
}

static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)
{
return __builtin_mul_overflow(a, b, result);
}
#else /* !use_builtin(__builtin_mul_overflow) */
static inline bool u32_mul_overflow(u32_t a, u32_t b, u32_t *result)
{
u32_t c = a * b;
@@ -66,7 +114,54 @@ static inline bool size_mul_overflow(size_t a, size_t b, size_t *result)

return a != 0 && (c / a) != b;
}
#endif /* use_builtin(__builtin_mul_overflow) */


/*
* The GCC builtins __builtin_clz(), __builtin_ctz(), and 64-bit
* variants are described by the GCC documentation as having undefined
* behavior when the argument is zero. See
* https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
*
* The undefined behavior applies to all architectures, regardless of
* the behavior of the instruction used to implement the builtin.
*
* We don't want to expose users of this API to the undefined behavior,
* so we use a conditional to explicitly provide the correct result when
* x=0.
*
* Most instruction set architectures have a CLZ instruction or similar
* that already computes the correct result for x=0. Both GCC and Clang
* know this and simply generate a CLZ instruction, optimizing away the
* conditional.
*
* For x86, and for compilers that fail to eliminate the conditional,
* there is often another opportunity for optimization since code using
* these functions tends to contain a zero check already. For example,
* from kernel/sched.c:
*
* struct k_thread *z_priq_mq_best(struct _priq_mq *pq)
* {
* if (!pq->bitmask) {
* return NULL;
* }
*
* struct k_thread *t = NULL;
* sys_dlist_t *l =
* &pq->queues[u32_count_trailing_zeros(pq->bitmask)];
*
* ...
*
* The compiler will often be able to eliminate the redundant x == 0
* check after inlining the call to u32_count_trailing_zeros().
*/

#if use_builtin(__builtin_clz)
static inline int u32_count_leading_zeros(u32_t x)
{
return x == 0 ? 32 : __builtin_clz(x);
}
#else /* !use_builtin(__builtin_clz) */
static inline int u32_count_leading_zeros(u32_t x)
{
int b;
@@ -77,7 +172,14 @@ static inline int u32_count_leading_zeros(u32_t x)

return b;
}
#endif /* use_builtin(__builtin_clz) */

#if use_builtin(__builtin_clzll)
static inline int u64_count_leading_zeros(u64_t x)
{
return x == 0 ? 64 : __builtin_clzll(x);
}
#else /* !use_builtin(__builtin_clzll) */
static inline int u64_count_leading_zeros(u64_t x)
{
if (x == (u32_t)x) {
@@ -86,7 +188,14 @@ static inline int u64_count_leading_zeros(u64_t x)
return u32_count_leading_zeros(x >> 32);
}
}
#endif /* use_builtin(__builtin_clzll) */

#if use_builtin(__builtin_ctz)
static inline int u32_count_trailing_zeros(u32_t x)
{
return x == 0 ? 32 : __builtin_ctz(x);
}
#else /* !use_builtin(__builtin_ctz) */
static inline int u32_count_trailing_zeros(u32_t x)
{
int b;
@@ -97,7 +206,14 @@ static inline int u32_count_trailing_zeros(u32_t x)

return b;
}
#endif /* use_builtin(__builtin_ctz) */

#if use_builtin(__builtin_ctzll)
static inline int u64_count_trailing_zeros(u64_t x)
{
return x == 0 ? 64 : __builtin_ctzll(x);
}
#else /* !use_builtin(__builtin_ctzll) */
static inline int u64_count_trailing_zeros(u64_t x)
{
if ((u32_t)x) {
@@ -106,3 +222,6 @@ static inline int u64_count_trailing_zeros(u64_t x)
return 32 + u32_count_trailing_zeros(x >> 32);
}
}
#endif /* use_builtin(__builtin_ctzll) */

#undef use_builtin

0 comments on commit b2945d3

Please sign in to comment.
You can’t perform that action at this time.