Skip to content

Commit 88e88a1

Browse files
committed
Avx2 constant swizzle 8/16 bits and optimizations
1 parent 37e5d9f commit 88e88a1

File tree

4 files changed

+233
-85
lines changed

4 files changed

+233
-85
lines changed

include/xsimd/arch/common/xsimd_common_swizzle.hpp

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <cstdint>
1717
#include <type_traits>
1818

19+
#include "../../config/xsimd_inline.hpp"
20+
1921
namespace xsimd
2022
{
2123
template <typename T, class A, T... Values>
@@ -39,7 +41,7 @@ namespace xsimd
3941
};
4042

4143
// ────────────────────────────────────────────────────────────────────────
42-
// 1) identity_impl
44+
// identity_impl
4345
template <std::size_t /*I*/, typename T>
4446
XSIMD_INLINE constexpr bool identity_impl() noexcept { return true; }
4547
template <std::size_t I, typename T, T V0, T... Vs>
@@ -50,18 +52,7 @@ namespace xsimd
5052
}
5153

5254
// ────────────────────────────────────────────────────────────────────────
53-
// 2) bitmask_impl
54-
template <std::size_t /*I*/, std::size_t /*N*/, typename T>
55-
XSIMD_INLINE constexpr std::uint32_t bitmask_impl() noexcept { return 0u; }
56-
template <std::size_t I, std::size_t N, typename T, T V0, T... Vs>
57-
XSIMD_INLINE constexpr std::uint32_t bitmask_impl() noexcept
58-
{
59-
return (1u << (static_cast<std::uint32_t>(V0) & (N - 1)))
60-
| bitmask_impl<I + 1, N, T, Vs...>();
61-
}
62-
63-
// ────────────────────────────────────────────────────────────────────────
64-
// 3) dup_lo_impl
55+
// dup_lo_impl
6556
template <std::size_t I, std::size_t N, typename T,
6657
T... Vs, typename std::enable_if<I == N / 2, int>::type = 0>
6758
XSIMD_INLINE constexpr bool dup_lo_impl() noexcept { return true; }
@@ -76,7 +67,7 @@ namespace xsimd
7667
}
7768

7869
// ────────────────────────────────────────────────────────────────────────
79-
// 4) dup_hi_impl
70+
// dup_hi_impl
8071
template <std::size_t I, std::size_t N, typename T,
8172
T... Vs, typename std::enable_if<I == N / 2, int>::type = 0>
8273
XSIMD_INLINE constexpr bool dup_hi_impl() noexcept { return true; }
@@ -91,6 +82,52 @@ namespace xsimd
9182
&& dup_hi_impl<I + 1, N, T, Vs...>();
9283
}
9384

85+
// ────────────────────────────────────────────────────────────────────────
86+
// only_from_lo
87+
template <typename T, T Size, T First, T... Vals>
88+
struct only_from_lo_impl;
89+
90+
template <typename T, T Size, T Last>
91+
struct only_from_lo_impl<T, Size, Last>
92+
{
93+
static constexpr bool value = (Last < (Size / 2));
94+
};
95+
96+
template <typename T, T Size, T First, T... Vals>
97+
struct only_from_lo_impl
98+
{
99+
static constexpr bool value = (First < (Size / 2)) && only_from_lo_impl<T, Size, Vals...>::value;
100+
};
101+
102+
template <typename T, T... Vals>
103+
constexpr bool is_only_from_lo()
104+
{
105+
return only_from_lo_impl<T, sizeof...(Vals), Vals...>::value;
106+
};
107+
108+
// ────────────────────────────────────────────────────────────────────────
109+
// only_from_hi
110+
template <typename T, T Size, T First, T... Vals>
111+
struct only_from_hi_impl;
112+
113+
template <typename T, T Size, T Last>
114+
struct only_from_hi_impl<T, Size, Last>
115+
{
116+
static constexpr bool value = (Last >= (Size / 2));
117+
};
118+
119+
template <typename T, T Size, T First, T... Vals>
120+
struct only_from_hi_impl
121+
{
122+
static constexpr bool value = (First >= (Size / 2)) && only_from_hi_impl<T, Size, Vals...>::value;
123+
};
124+
125+
template <typename T, T... Vals>
126+
constexpr bool is_only_from_hi()
127+
{
128+
return only_from_hi_impl<T, sizeof...(Vals), Vals...>::value;
129+
};
130+
94131
// ────────────────────────────────────────────────────────────────────────
95132
// 1) helper to get the I-th value from the Vs pack
96133
template <std::size_t I, uint32_t Head, uint32_t... Tail>
@@ -123,49 +160,31 @@ namespace xsimd
123160
{
124161
static constexpr bool value = false;
125162
};
126-
template <std::size_t I, std::size_t N, typename T,
127-
T... Vs>
128-
XSIMD_INLINE constexpr bool no_duplicates_impl() noexcept
129-
{
130-
// build the bitmask of (Vs & (N-1)) across all lanes
131-
return detail::bitmask_impl<0, N, T, Vs...>() == ((1u << N) - 1u);
132-
}
133-
template <uint32_t... Vs>
134-
XSIMD_INLINE constexpr bool no_duplicates_v() noexcept
135-
{
136-
// forward to your existing no_duplicates_impl
137-
return no_duplicates_impl<0, sizeof...(Vs), uint32_t, Vs...>();
138-
}
139163
template <uint32_t... Vs>
140164
XSIMD_INLINE constexpr bool is_cross_lane() noexcept
141165
{
142166
static_assert(sizeof...(Vs) >= 1, "Need at least one lane");
143167
return cross_impl<0, sizeof...(Vs), sizeof...(Vs) / 2, Vs...>::value;
144168
}
169+
145170
template <typename T, T... Vs>
146171
XSIMD_INLINE constexpr bool is_identity() noexcept { return detail::identity_impl<0, T, Vs...>(); }
147-
template <typename T, T... Vs>
148-
XSIMD_INLINE constexpr bool is_all_different() noexcept
149-
{
150-
return detail::bitmask_impl<0, sizeof...(Vs), T, Vs...>() == ((1u << sizeof...(Vs)) - 1);
151-
}
152-
153172
template <typename T, T... Vs>
154173
XSIMD_INLINE constexpr bool is_dup_lo() noexcept { return detail::dup_lo_impl<0, sizeof...(Vs), T, Vs...>(); }
155174
template <typename T, T... Vs>
156175
XSIMD_INLINE constexpr bool is_dup_hi() noexcept { return detail::dup_hi_impl<0, sizeof...(Vs), T, Vs...>(); }
157176
template <typename T, class A, T... Vs>
158177
XSIMD_INLINE constexpr bool is_identity(batch_constant<T, A, Vs...>) noexcept { return is_identity<T, Vs...>(); }
159178
template <typename T, class A, T... Vs>
160-
XSIMD_INLINE constexpr bool is_all_different(batch_constant<T, A, Vs...>) noexcept { return is_all_different<T, Vs...>(); }
161-
template <typename T, class A, T... Vs>
162179
XSIMD_INLINE constexpr bool is_dup_lo(batch_constant<T, A, Vs...>) noexcept { return is_dup_lo<T, Vs...>(); }
163180
template <typename T, class A, T... Vs>
164181
XSIMD_INLINE constexpr bool is_dup_hi(batch_constant<T, A, Vs...>) noexcept { return is_dup_hi<T, Vs...>(); }
165182
template <typename T, class A, T... Vs>
166-
XSIMD_INLINE constexpr bool is_cross_lane(batch_constant<T, A, Vs...>) noexcept { return detail::is_cross_lane<Vs...>(); }
183+
XSIMD_INLINE constexpr bool is_only_from_lo(batch_constant<T, A, Vs...>) noexcept { return detail::is_only_from_lo<T, Vs...>(); }
167184
template <typename T, class A, T... Vs>
168-
XSIMD_INLINE constexpr bool no_duplicates(batch_constant<T, A, Vs...>) noexcept { return no_duplicates_impl<0, sizeof...(Vs), T, Vs...>(); }
185+
XSIMD_INLINE constexpr bool is_only_from_hi(batch_constant<T, A, Vs...>) noexcept { return detail::is_only_from_hi<T, Vs...>(); }
186+
template <typename T, class A, T... Vs>
187+
XSIMD_INLINE constexpr bool is_cross_lane(batch_constant<T, A, Vs...>) noexcept { return detail::is_cross_lane<Vs...>(); }
169188

170189
} // namespace detail
171190
} // namespace kernel

0 commit comments

Comments
 (0)