Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/source/api/basic_functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ Basic functions
.. doxygenfunction:: fdim(const batch<T, N>&, const batch<T, N>&)
:project: xsimd

.. _sadd-function-reference:
.. doxygenfunction:: sadd(const simd_base<B>&, const simd_base<B>&)
:project: xsimd

.. _ssub-function-reference:
.. doxygenfunction:: ssub(const simd_base<B>&, const simd_base<B>&)
:project: xsimd

.. _clip-function-reference:
.. doxygenfunction:: clip(const simd_base<B>&, const simd_base<B>&, const simd_base<B>&)
:project: xsimd
Expand Down
4 changes: 4 additions & 0 deletions docs/source/api/math_index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ Mathematical functions
+---------------------------------------+----------------------------------------------------+
| :ref:`fdim <fdim-function-reference>` | positive difference |
+---------------------------------------+----------------------------------------------------+
| :ref:`sadd <sadd-function-reference>` | saturated addition |
+---------------------------------------+----------------------------------------------------+
| :ref:`ssub <ssub-function-reference>` | saturated subtraction |
+---------------------------------------+----------------------------------------------------+
| :ref:`clip <clip-function-reference>` | clipping operation |
+---------------------------------------+----------------------------------------------------+

Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/math/xsimd_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "xsimd_logarithm.hpp"
#include "xsimd_power.hpp"
#include "xsimd_rounding.hpp"
#include "xsimd_scalar.hpp"
#include "xsimd_trigonometric.hpp"
#include "xsimd/types/xsimd_scalar.hpp"

#endif
25 changes: 1 addition & 24 deletions include/xsimd/math/xsimd_power.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "xsimd_horner.hpp"
#include "xsimd_logarithm.hpp"
#include "xsimd_numerical_constant.hpp"
#include "xsimd/types/xsimd_common_math.hpp"

namespace xsimd
{
Expand Down Expand Up @@ -85,30 +86,6 @@ namespace xsimd
}
};

template <class T0, class T1>
inline T0
ipow(const T0& t0, const T1& t1)
{
static_assert(std::is_integral<T1>::value, "second argument must be an integer");
T0 a = t0;
T1 b = t1;
bool const recip = b < 0;
T0 r{static_cast<T0>(1)};
while (1)
{
if (b & 1)
{
r *= a;
}
b /= 2;
if (b == 0)
{
break;
}
a *= a;
}
return recip ? 1 / r : r;
}
}

template <class B>
Expand Down
10 changes: 10 additions & 0 deletions include/xsimd/types/xsimd_avx512_double.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,16 @@ namespace xsimd
return _mm512_sub_pd(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
return add(lhs, rhs); //do something for inf ?
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sub(lhs, rhs); //do something for inf ?
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm512_mul_pd(lhs, rhs);
Expand Down
10 changes: 10 additions & 0 deletions include/xsimd/types/xsimd_avx512_float.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,16 @@ namespace xsimd
return _mm512_sub_ps(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
return add(lhs, rhs); //do something for inf ?
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sub(lhs, rhs); //do something for inf ?
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm512_mul_ps(lhs, rhs);
Expand Down
36 changes: 36 additions & 0 deletions include/xsimd/types/xsimd_avx512_int16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,24 @@ namespace xsimd
#endif
}

static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_adds_epi16(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_INT16(sadd, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_subs_epi16(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_INT16(ssub, lhs, rhs);
#endif
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
Expand Down Expand Up @@ -466,6 +484,24 @@ namespace xsimd
XSIMD_APPLY_AVX2_FUNCTION_INT16(lte, lhs, rhs);
#endif
}

static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_adds_epu16(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_UINT16(sadd, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_subs_epu16(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_UINT16(ssub, lhs, rhs);
#endif
}
};
}

Expand Down
26 changes: 26 additions & 0 deletions include/xsimd/types/xsimd_avx512_int32.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,19 @@ namespace xsimd
return _mm512_sub_epi32(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll benchmark that approach compared to the one based on a comparison + a blend.

Copy link
Contributor Author

@serge-sans-paille serge-sans-paille Apr 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, found a nice and efficient solution based on min/max for the unsigned version \o/

{
batch_bool_type mask = _mm512_movepi32_mask(rhs);
batch_type lhs_pos_branch = min(std::numeric_limits<value_type>::max() - rhs, lhs);
batch_type lhs_neg_branch = max(std::numeric_limits<value_type>::min() - rhs, lhs);
return rhs + select(mask, lhs_neg_branch, lhs_pos_branch);
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sadd(lhs, neg(rhs));
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm512_mullo_epi32(lhs, rhs);
Expand Down Expand Up @@ -350,6 +363,19 @@ namespace xsimd
{
return rhs;
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
const auto diffmax = batch_type(std::numeric_limits<value_type>::max()) - lhs;
const auto mindiff = min(diffmax, rhs);
return lhs + mindiff;
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
const auto diff = min(lhs, rhs);
return lhs - diff;
}
};
}

Expand Down
26 changes: 26 additions & 0 deletions include/xsimd/types/xsimd_avx512_int64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ namespace xsimd
return _mm512_sub_epi64(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
batch_bool_type mask = _mm512_movepi64_mask(rhs);
batch_type lhs_pos_branch = min(std::numeric_limits<value_type>::max() - rhs, lhs);
batch_type lhs_neg_branch = max(std::numeric_limits<value_type>::min() - rhs, lhs);
return rhs + select(mask, lhs_neg_branch, lhs_pos_branch);
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sadd(lhs, neg(rhs));
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm512_mullo_epi64(lhs, rhs);
Expand Down Expand Up @@ -419,6 +432,19 @@ namespace xsimd
{
return rhs;
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
const auto diffmax = batch_type(std::numeric_limits<value_type>::max()) - lhs;
const auto mindiff = min(diffmax, rhs);
return lhs + mindiff;
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
const auto diff = min(lhs, rhs);
return lhs - diff;
}
};
}

Expand Down
36 changes: 36 additions & 0 deletions include/xsimd/types/xsimd_avx512_int8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,24 @@ namespace xsimd
#endif
}

static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_adds_epi8(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_INT8(sadd, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_subs_epi8(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_INT8(ssub, lhs, rhs);
#endif
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
Expand Down Expand Up @@ -470,6 +488,24 @@ namespace xsimd
XSIMD_APPLY_AVX2_FUNCTION_INT8(lte, lhs, rhs);
#endif
}

static batch_type sadd(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_adds_epu8(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_UINT8(sadd, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type &lhs, const batch_type &rhs)
{
#if defined(XSIMD_AVX512BW_AVAILABLE)
return _mm512_subs_epu8(lhs, rhs);
#else
XSIMD_APPLY_AVX2_FUNCTION_UINT8(ssub, lhs, rhs);
#endif
}
};
}

Expand Down
10 changes: 10 additions & 0 deletions include/xsimd/types/xsimd_avx_double.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,16 @@ namespace xsimd
return _mm256_sub_pd(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
return add(lhs, rhs); //FIXME something special for inf ?
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sub(lhs,rhs); //FIXME something special for inf ?
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm256_mul_pd(lhs, rhs);
Expand Down
10 changes: 10 additions & 0 deletions include/xsimd/types/xsimd_avx_float.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,16 @@ namespace xsimd
return _mm256_sub_ps(lhs, rhs);
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
return add(lhs, rhs); //FIXME something special for inf ?
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
return sub(lhs,rhs); //FIXME something special for inf ?
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
return _mm256_mul_ps(lhs, rhs);
Expand Down
36 changes: 36 additions & 0 deletions include/xsimd/types/xsimd_avx_int16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,24 @@ namespace xsimd
XSIMD_APPLY_SSE_FUNCTION(_mm_sub_epi16, lhs, rhs);
#endif
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_adds_epi16(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_adds_epi16, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_subs_epi16(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_subs_epi16, lhs, rhs);
#endif
}

static batch_type mul(const batch_type& lhs, const batch_type& rhs)
{
Expand Down Expand Up @@ -331,6 +349,24 @@ namespace xsimd
{
return rhs;
}

static batch_type sadd(const batch_type& lhs, const batch_type& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_adds_epu16(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_adds_epu16, lhs, rhs);
#endif
}

static batch_type ssub(const batch_type& lhs, const batch_type& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_subs_epu16(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_subs_epu16, lhs, rhs);
#endif
}
};
}

Expand Down
Loading