fix CI with emulated<256> and avx512f (#1326) #1327

serge-sans-paille · 2026-04-25T07:21:43Z

This seems redundant with requires_arch<avx512f>, why do you think this changes anything.

The error occurs not in the function body, but in instantiation of the batch_constant parameter which is placed before requires_arch.

Thanks for your answer! Actually this helped me better understand the issue, what do you think of the following, which looks more maintainable to me (and will be even cleaner once we move to C++17):

diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp index 97427f19..cd249c46 100644 --- a/include/xsimd/arch/xsimd_avx512f.hpp +++ b/include/xsimd/arch/xsimd_avx512f.hpp @@ -2588,30 +2588,45 @@ namespace xsimd I16 / 2, I18 / 2, I20 / 2, I22 / 2, I24 / 2, I26 / 2, I28 / 2, I30 / 2>; }; - } + template<class A, uint16_t... Is> + constexpr bool is_reduce_pattern() { + // The actual pattern is {1, 1, 0, 1, 0, 1, ..., 0, 1} + if(sizeof...(Is) != batch<uint16_t, A>::size) return false; + uint16_t pattern[] = {Is...}; + if(pattern[0] != 1) + return false; + for(size_t i = 1; i < sizeof...(Is); i += 1) { + if(pattern[i] != (i & 1)) + return false; + } + return true; + } - template <class A, uint16_t... Idx, class = std::enable_if_t<detail::is_pair_of_contiguous_indices<uint16_t, A, Idx...>::value>> - XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Idx...>, requires_arch<avx512f>) noexcept - { - constexpr typename detail::fold_batch_constant<A, Idx...>::type mask32; - return _mm512_permutexvar_epi32(static_cast<batch<uint32_t, A>>(mask32), self); } - template <class A> - XSIMD_INLINE batch<uint16_t, A> - swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, (uint16_t)1, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1>, requires_arch<avx512f>) noexcept + template <class A, uint16_t... Idx> + XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Idx...> mask, requires_arch<avx512f>) noexcept { - // FIXME: this sequence is very inefficient, but it's here to catch - // a pattern generated by detail::reduce from xsimd_common_math.hpp. - // The whole pattern is actually decently folded by GCC and Clang, - // so bare with it. - constexpr batch_constant<uint32_t, A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> mask32; - auto tmp = _mm512_permutexvar_epi32(static_cast<batch<uint32_t, A>>(mask32), self); + XSIMD_IF_CONSTEXPR(detail::is_pair_of_contiguous_indices<uint16_t, A, Idx...>::value) { + constexpr typename detail::fold_batch_constant<A, Idx...>::type mask32; + return _mm512_permutexvar_epi32(static_cast<batch<uint32_t, A>>(mask32), self); + } + else XSIMD_IF_CONSTEXPR(detail::is_reduce_pattern<A, Idx...>()) { + // FIXME: this sequence is very inefficient, but it's here to catch + // a pattern generated by detail::reduce from xsimd_common_math.hpp. + // The whole pattern is actually decently folded by GCC and Clang, + // so bare with it. + constexpr batch_constant<uint32_t, A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> mask32; + auto tmp = _mm512_permutexvar_epi32(static_cast<batch<uint32_t, A>>(mask32), self); - alignas(A::alignment()) uint16_t buffer[32]; - _mm512_store_si512((__m512i*)&buffer[0], tmp); - buffer[0] = buffer[1]; - return _mm512_load_si512(&buffer[0]); + alignas(A::alignment()) uint16_t buffer[32]; + _mm512_store_si512((__m512i*)&buffer[0], tmp); + buffer[0] = buffer[1]; + return _mm512_load_si512(&buffer[0]); + } + else { + return swizzle(self, mask, common{}); + } } template <class A, uint16_t... Vs>

@hdu-sdlzx : I applied the patch on #1328 , adding the proper credit in the PR. comments are welcome!

what do you think of the following, which looks more maintainable to me (and will be even cleaner once we move to C++17):

LGTM
I'm happy with a more readable patch as long as other PRs are not blocked by CI.

-Original file line number
+Diff line change
@@ Expand Up / @@ -2506,7 +2506,7 @@ namespace xsimd @@
                 return _mm512_permutexvar_epi32(static_cast<batch<uint32_t, A>>(mask32), self);
             }
-            template <class A>
+            template <class A, class = std::enable_if_t<batch<uint16_t, A>::size == 32>>
             XSIMD_INLINE batch<uint16_t, A>
             swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, (uint16_t)1, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1, (uint16_t)0, (uint16_t)1>, requires_arch<avx512f>) noexcept
             {
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix CI with emulated<256> and avx512f (#1326) #1327

Diff view

Diff view

There are no files selected for viewing

serge-sans-paille Apr 25, 2026

Uh oh!

hdu-sdlzx Apr 26, 2026

Uh oh!

serge-sans-paille Apr 26, 2026

Uh oh!

serge-sans-paille Apr 26, 2026

Uh oh!

hdu-sdlzx Apr 27, 2026

Uh oh!

serge-sans-paille Apr 27, 2026

Uh oh!

Uh oh!

fix CI with emulated<256> and avx512f (#1326) #1327

fix CI with emulated<256> and avx512f (#1326) #1327

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

serge-sans-paille Apr 25, 2026

Choose a reason for hiding this comment

Uh oh!

hdu-sdlzx Apr 26, 2026

Choose a reason for hiding this comment

Uh oh!

serge-sans-paille Apr 26, 2026

Choose a reason for hiding this comment

Uh oh!

serge-sans-paille Apr 26, 2026

Choose a reason for hiding this comment

Uh oh!

hdu-sdlzx Apr 27, 2026

Choose a reason for hiding this comment

Uh oh!

serge-sans-paille Apr 27, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!