Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion include/xsimd/arch/common/xsimd_common_arithmetic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <limits>
#include <type_traits>

#include "../xsimd_common_fwd.hpp"
#include "./xsimd_common_details.hpp"

namespace xsimd
Expand All @@ -26,14 +27,23 @@ namespace xsimd

using namespace types;

// bitwise_lshift
// bitwise_lshift multiple (dynamic)
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value>::type*/>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
{
return detail::apply([](T x, T y) noexcept
{ return x << y; },
self, other);
}

// bitwise_lshift multiple (constant)
template <class A, class T, T... Vals, detail::enable_integral_t<T>>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch_constant<T, A, Vals...> const& rhs, requires_arch<common> req) noexcept
{
return bitwise_lshift(lhs, rhs.as_batch(), req);
}

// bitwise_lshift single (constant)
template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value>::type*/>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
{
Expand Down
63 changes: 63 additions & 0 deletions include/xsimd/arch/utils/shifts.hpp
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I named this util as common seems to be more used for implementing the common architecture.

Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* Copyright (c) Marco Barbone *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_UTILS_SHIFTS_HPP
#define XSIMD_UTILS_SHIFTS_HPP

#include "xsimd/config/xsimd_inline.hpp"
#include "xsimd/types/xsimd_batch.hpp"

namespace xsimd
{
namespace kernel
{
namespace utils
{
template <typename I, I offset, I length, I... Vs>
struct select_stride
{
static constexpr I values_array[] = { Vs... };

template <typename K>
static constexpr K get(K i, K)
{
return static_cast<K>(values_array[length * i + offset]);
}
};

template <class T, class T2, class A, class R, T... Vs>
XSIMD_INLINE batch<T, A> bitwise_lshift_as_twice_larger(
batch<T, A> const& self, batch_constant<T, A, Vs...>, R req) noexcept
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should remove R req here

{
static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other");

const auto self2 = bitwise_cast<T2>(self);

// Lower byte: shift as twice the size and mask bits flowing to higher byte.
constexpr auto shifts_lo = make_batch_constant<T2, select_stride<T, 0, 2, Vs...>, A>();
const auto shifted_lo = bitwise_lshift<A>(self2, shifts_lo, req);
const batch<T2, A> mask_lo { T2 { 0x00FF } };
const auto masked_lo = bitwise_and<A>(shifted_lo, mask_lo, req);

// Higher byte: mask bits that would flow from lower byte and shift as twice the size.
constexpr auto shifts_hi = make_batch_constant<T2, select_stride<T, 1, 2, Vs...>, A>();
const batch<T2, A> mask_hi { T2 { 0xFF00 } };
const auto masked_hi = bitwise_and<A>(self2, mask_hi, req);
const auto shifted_hi = bitwise_lshift<A>(masked_hi, shifts_hi, req);

return bitwise_cast<T>(bitwise_or(masked_lo, shifted_hi, req));
}
}
}
}

#endif
9 changes: 8 additions & 1 deletion include/xsimd/arch/xsimd_avx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ namespace xsimd
self, other);
}

// bitwise_lshift
// bitwise_lshift single
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx>) noexcept
{
Expand All @@ -258,6 +258,13 @@ namespace xsimd
self, other);
}

// bitwise_lshift multiple
template <class A, class T, T... Vals, detail::enable_integral_t<T> = 0>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch_constant<T, A, Vals...> const& rhs, requires_arch<avx> req) noexcept
{
return bitwise_lshift(lhs, rhs.as_batch(), req);
}

// bitwise_not
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<avx>) noexcept
Expand Down
38 changes: 28 additions & 10 deletions include/xsimd/arch/xsimd_avx2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
#define XSIMD_AVX2_HPP

#include <complex>
#include <limits>
#include <type_traits>

#include "../types/xsimd_avx2_register.hpp"

#include <limits>
#include "./utils/shifts.hpp"

namespace xsimd
{
Expand Down Expand Up @@ -152,7 +152,19 @@ namespace xsimd
return _mm256_xor_si256(self, _mm256_set1_epi32(-1));
}

// bitwise_lshift
// bitwise_or
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
{
return _mm256_or_si256(self, other);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept
{
return _mm256_or_si256(self, other);
}

// bitwise_lshift single (dynamic)
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx2>) noexcept
{
Expand All @@ -174,6 +186,7 @@ namespace xsimd
}
}

// bitwise_lshift single (constant)
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
{
Expand All @@ -197,6 +210,7 @@ namespace xsimd
}
}

// bitwise_lshift multiple (dynamic)
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
{
Expand All @@ -214,16 +228,20 @@ namespace xsimd
}
}

// bitwise_or
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
// bitwise_lshift multiple (constant)
template <class A, uint16_t... Vs>
XSIMD_INLINE batch<uint16_t, A> bitwise_lshift(
batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> shifts, requires_arch<avx2>) noexcept
{
return _mm256_or_si256(self, other);
constexpr auto mults = batch_constant<uint16_t, A, static_cast<uint16_t>(1u << Vs)...>();
return _mm256_mullo_epi16(self, mults.as_batch());
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx2>) noexcept

template <class A, uint8_t... Vs>
XSIMD_INLINE batch<uint8_t, A> bitwise_lshift(
batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> shifts, requires_arch<avx2> req) noexcept
{
return _mm256_or_si256(self, other);
return utils::bitwise_lshift_as_twice_larger<uint8_t, uint16_t>(self, shifts, req);
}

// bitwise_rshift
Expand Down
2 changes: 2 additions & 0 deletions include/xsimd/arch/xsimd_common_fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ namespace xsimd
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<common>) noexcept;
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
template <class A, class T, T... Vals, detail::enable_integral_t<T> = 0>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch_constant<T, A, Vals...> const& rhs, requires_arch<common> req) noexcept;
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept;
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
Expand Down
133 changes: 76 additions & 57 deletions include/xsimd/arch/xsimd_sse2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <type_traits>

#include "../types/xsimd_sse2_register.hpp"
#include "./utils/shifts.hpp"

namespace xsimd
{
Expand Down Expand Up @@ -266,63 +267,6 @@ namespace xsimd
return _mm_andnot_pd(other, self);
}

// bitwise_lshift
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm_slli_epi16(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm_slli_epi32(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm_slli_epi64(self, other);
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<sse2>) noexcept
{
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
static_assert(shift < bits, "Count must be less than the number of bits in T");
XSIMD_IF_CONSTEXPR(shift == 0)
{
return self;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
// 8-bit left shift via 16-bit shift + mask
__m128i shifted = _mm_slli_epi16(self, static_cast<int>(shift));
__m128i mask = _mm_set1_epi8(static_cast<char>(0xFF << shift));
return _mm_and_si128(shifted, mask);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm_slli_epi16(self, static_cast<int>(shift));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm_slli_epi32(self, static_cast<int>(shift));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm_slli_epi64(self, static_cast<int>(shift));
}
return bitwise_lshift<shift>(self, common {});
}

// bitwise_not
template <class A>
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<sse2>) noexcept
Expand Down Expand Up @@ -577,6 +521,81 @@ namespace xsimd
return _mm_castpd_si128(self);
}

// bitwise_lshift single (dynamic)
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm_slli_epi16(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm_slli_epi32(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm_slli_epi64(self, other);
}
else
{
assert(false && "unsupported arch/op combination");
return {};
}
}

// bitwise_lshift single (constant)
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<sse2>) noexcept
{
constexpr auto bits = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;
static_assert(shift < bits, "Count must be less than the number of bits in T");
XSIMD_IF_CONSTEXPR(shift == 0)
{
return self;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
// 8-bit left shift via 16-bit shift + mask
__m128i shifted = _mm_slli_epi16(self, static_cast<int>(shift));
__m128i mask = _mm_set1_epi8(static_cast<char>(0xFF << shift));
return _mm_and_si128(shifted, mask);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm_slli_epi16(self, static_cast<int>(shift));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return _mm_slli_epi32(self, static_cast<int>(shift));
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return _mm_slli_epi64(self, static_cast<int>(shift));
}
return bitwise_lshift<shift>(self, common {});
}

// bitwise_lshift multiple (constant)
template <class A, uint16_t... Vs>
XSIMD_INLINE batch<uint16_t, A> bitwise_lshift(
batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...>, requires_arch<sse2>) noexcept
{
constexpr auto mults = batch_constant<uint16_t, A, static_cast<uint16_t>(1u << Vs)...>();
return _mm_mullo_epi16(self, mults.as_batch());
}

template <class A, uint8_t... Vs>
XSIMD_INLINE batch<uint8_t, A> bitwise_lshift(
batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> shifts, requires_arch<sse2> req) noexcept
{
return utils::bitwise_lshift_as_twice_larger<uint8_t, uint16_t>(self, shifts, req);
}

// broadcast
template <class A>
batch<float, A> XSIMD_INLINE broadcast(float val, requires_arch<sse2>) noexcept
Expand Down
9 changes: 9 additions & 0 deletions include/xsimd/arch/xsimd_sse4_1.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ namespace xsimd
return _mm_ceil_pd(self);
}

// bitwise_lshift multiple (constant)
template <class A, uint32_t... Vs>
XSIMD_INLINE batch<uint32_t, A> bitwise_lshift(
batch<uint32_t, A> const& self, batch_constant<uint32_t, A, Vs...> shifts, requires_arch<sse4_1>) noexcept
{
constexpr auto mults = batch_constant<uint32_t, A, static_cast<uint32_t>(1u << Vs)...>();
return _mm_mullo_epi32(self, mults.as_batch());
}

// fast_cast
namespace detail
{
Expand Down
6 changes: 6 additions & 0 deletions include/xsimd/types/xsimd_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,12 @@ namespace xsimd
detail::static_check_supported_config<T, A>();
return kernel::bitwise_lshift<shift, A>(x, A {});
}
template <class T, class A, class Vt, Vt... Values>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch_constant<Vt, A, Values...> shift) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::bitwise_lshift<A>(x, shift, A {});
}

/**
* @ingroup batch_bitwise
Expand Down
Loading