diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp index c160670d4..457c78286 100644 --- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp @@ -16,6 +16,7 @@ #include #include +#include "../xsimd_common_fwd.hpp" #include "./xsimd_common_details.hpp" namespace xsimd @@ -26,7 +27,7 @@ namespace xsimd using namespace types; - // bitwise_lshift + // bitwise_lshift multiple (dynamic) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -34,6 +35,15 @@ namespace xsimd { return x << y; }, self, other); } + + // bitwise_lshift multiple (constant) + template > + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + + // bitwise_lshift single (constant) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { diff --git a/include/xsimd/arch/utils/shifts.hpp b/include/xsimd/arch/utils/shifts.hpp new file mode 100644 index 000000000..8ddb5edb8 --- /dev/null +++ b/include/xsimd/arch/utils/shifts.hpp @@ -0,0 +1,63 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Marco Barbone * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_UTILS_SHIFTS_HPP +#define XSIMD_UTILS_SHIFTS_HPP + +#include "xsimd/config/xsimd_inline.hpp" +#include "xsimd/types/xsimd_batch.hpp" + +namespace xsimd +{ + namespace kernel + { + namespace utils + { + template + struct select_stride + { + static constexpr I values_array[] = { Vs... }; + + template + static constexpr K get(K i, K) + { + return static_cast(values_array[length * i + offset]); + } + }; + + template + XSIMD_INLINE batch bitwise_lshift_as_twice_larger( + batch const& self, batch_constant, R req) noexcept + { + static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); + + const auto self2 = bitwise_cast(self); + + // Lower byte: shift as twice the size and mask bits flowing to higher byte. + constexpr auto shifts_lo = make_batch_constant, A>(); + const auto shifted_lo = bitwise_lshift(self2, shifts_lo, req); + const batch mask_lo { T2 { 0x00FF } }; + const auto masked_lo = bitwise_and(shifted_lo, mask_lo, req); + + // Higher byte: mask bits that would flow from lower byte and shift as twice the size. + constexpr auto shifts_hi = make_batch_constant, A>(); + const batch mask_hi { T2 { 0xFF00 } }; + const auto masked_hi = bitwise_and(self2, mask_hi, req); + const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi, req); + + return bitwise_cast(bitwise_or(masked_lo, shifted_hi, req)); + } + } + } +} + +#endif diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index f8545d11c..c5504ef32 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -249,7 +249,7 @@ namespace xsimd self, other); } - // bitwise_lshift + // bitwise_lshift single template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -258,6 +258,13 @@ namespace xsimd self, other); } + // bitwise_lshift multiple + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + // bitwise_not template ::value>::type> XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 2c44df461..3b5d5c81c 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -13,11 +13,11 @@ #define XSIMD_AVX2_HPP #include +#include #include #include "../types/xsimd_avx2_register.hpp" - -#include +#include "./utils/shifts.hpp" namespace xsimd { @@ -152,7 +152,19 @@ namespace xsimd return _mm256_xor_si256(self, _mm256_set1_epi32(-1)); } - // bitwise_lshift + // bitwise_or + template ::value>::type> + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + template ::value>::type> + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + + // bitwise_lshift single (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -174,6 +186,7 @@ namespace xsimd } } + // bitwise_lshift single (constant) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { @@ -197,6 +210,7 @@ namespace xsimd } } + // bitwise_lshift multiple (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -214,16 +228,20 @@ namespace xsimd } } - // bitwise_or - template ::value>::type> - XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept { - return _mm256_or_si256(self, other); + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm256_mullo_epi16(self, mults.as_batch()); } - template ::value>::type> - XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept { - return _mm256_or_si256(self, other); + return utils::bitwise_lshift_as_twice_larger(self, shifts, req); } // bitwise_rshift diff --git a/include/xsimd/arch/xsimd_common_fwd.hpp b/include/xsimd/arch/xsimd_common_fwd.hpp index 2b401155b..8cc73da46 100644 --- a/include/xsimd/arch/xsimd_common_fwd.hpp +++ b/include/xsimd/arch/xsimd_common_fwd.hpp @@ -26,6 +26,8 @@ namespace xsimd XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept; + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept; template ::value>::type> diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index deb1af542..c7b21c745 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -17,6 +17,7 @@ #include #include "../types/xsimd_sse2_register.hpp" +#include "./utils/shifts.hpp" namespace xsimd { @@ -266,63 +267,6 @@ namespace xsimd return _mm_andnot_pd(other, self); } - // bitwise_lshift - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept - { - XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, other); - } - else - { - assert(false && "unsupported arch/op combination"); - return {}; - } - } - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept - { - constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; - static_assert(shift < bits, "Count must be less than the number of bits in T"); - XSIMD_IF_CONSTEXPR(shift == 0) - { - return self; - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - // 8-bit left shift via 16-bit shift + mask - __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); - __m128i mask = _mm_set1_epi8(static_cast(0xFF << shift)); - return _mm_and_si128(shifted, mask); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, static_cast(shift)); - } - return bitwise_lshift(self, common {}); - } - // bitwise_not template XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept @@ -577,6 +521,81 @@ namespace xsimd return _mm_castpd_si128(self); } + // bitwise_lshift single (dynamic) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, other); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + // bitwise_lshift single (constant) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept + { + constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; + static_assert(shift < bits, "Count must be less than the number of bits in T"); + XSIMD_IF_CONSTEXPR(shift == 0) + { + return self; + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + // 8-bit left shift via 16-bit shift + mask + __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); + __m128i mask = _mm_set1_epi8(static_cast(0xFF << shift)); + return _mm_and_si128(shifted, mask); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, static_cast(shift)); + } + return bitwise_lshift(self, common {}); + } + + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi16(self, mults.as_batch()); + } + + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept + { + return utils::bitwise_lshift_as_twice_larger(self, shifts, req); + } + // broadcast template batch XSIMD_INLINE broadcast(float val, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 1a64fc878..96b9186d0 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -41,6 +41,15 @@ namespace xsimd return _mm_ceil_pd(self); } + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi32(self, mults.as_batch()); + } + // fast_cast namespace detail { diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 12bd9d95e..03a5e36a2 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -379,6 +379,12 @@ namespace xsimd detail::static_check_supported_config(); return kernel::bitwise_lshift(x, A {}); } + template + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch_constant shift) noexcept + { + detail::static_check_supported_config(); + return kernel::bitwise_lshift(x, shift, A {}); + } /** * @ingroup batch_bitwise diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index a61c0e6ad..96c6e87bf 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -351,7 +351,15 @@ struct xsimd_api_integral_types_functions { using value_type = typename scalar_type::type; - void test_bitwise_lshift() + struct arrange + { + static constexpr value_type get(size_t index, size_t /*size*/) + { + return static_cast(index); + } + }; + + void test_bitwise_lshift_single() { constexpr int shift = 3; value_type val0(12); @@ -364,6 +372,25 @@ struct xsimd_api_integral_types_functions CHECK_EQ(extract(cr), r); } + void test_bitwise_lshift_multiple() + { + constexpr auto Max = static_cast(std::numeric_limits::digits); + constexpr auto max_batch = xsimd::make_batch_constant(); + constexpr auto shifts = xsimd::make_batch_constant() % max_batch; + + auto shifted = xsimd::bitwise_lshift(T(1), shifts.as_batch()); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted.get(i), 1 << shifts.get(i)); + } + + auto shifted_cst = xsimd::bitwise_lshift(T(1), shifts); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); + } + } + void test_bitwise_rshift() { constexpr int shift = 3; @@ -426,9 +453,17 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) { xsimd_api_integral_types_functions Test; - SUBCASE("bitwise_lshift") + SUBCASE("test_bitwise_lshift_single") { - Test.test_bitwise_lshift(); + Test.test_bitwise_lshift_single(); + } + + SUBCASE("bitwise_lshift_multiple") + { + XSIMD_IF_CONSTEXPR(xsimd::is_batch::value) + { + Test.test_bitwise_lshift_multiple(); + } } SUBCASE("bitwise_rshift")