Skip to content

Commit

Permalink
Merge pull request #987 from xtensor-stack/feature/fix-version-values
Browse files Browse the repository at this point in the history
Fix various problems with architecture version handling
  • Loading branch information
JohanMabille committed Dec 7, 2023
2 parents 27ec4ff + 7941abf commit c5c2101
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 109 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/emscripten.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -19,11 +19,9 @@ jobs:
python
init-shell: bash



- name: Build script
shell: bash -el {0}
run: |
echo "Build script for wasm"
playwright install
./test/test_wasm/test_wasm.sh
./test/test_wasm/test_wasm.sh
129 changes: 64 additions & 65 deletions include/xsimd/arch/xsimd_wasm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ namespace xsimd
template <class A>
inline batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
{
return wasm_f32x4_eq(self, other);
return wasm_i32x4_eq(self, other);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
Expand Down Expand Up @@ -440,7 +440,7 @@ namespace xsimd
template <class A>
inline batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
{
return wasm_f64x2_eq(self, other);
return wasm_i64x2_eq(self, other);
}

// fast_cast
Expand Down Expand Up @@ -579,6 +579,30 @@ namespace xsimd
0xFFFFFF00,
0xFFFFFFFF,
};
alignas(A::alignment()) static const uint32_t lut16[][4] = {
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
};
alignas(A::alignment()) static const uint64_t lut8[][4] = {
{ 0x0000000000000000ul, 0x0000000000000000ul },
{ 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
{ 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
{ 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
};
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
assert(!(mask & ~0xFFFF) && "inbound mask");
Expand All @@ -587,15 +611,17 @@ namespace xsimd
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
assert(!(mask & ~0xFF) && "inbound mask");
return wasm_i64x2_make(lut64[mask >> 4], lut64[mask & 0xF]);
return wasm_i64x2_make(lut64[mask & 0xF], lut64[mask >> 4]);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
return batch_bool_cast<T>(from_mask(batch_bool<float, A> {}, mask, wasm {}));
assert(!(mask & ~0xFul) && "inbound mask");
return wasm_v128_load((const v128_t*)lut16[mask]);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
return batch_bool_cast<T>(from_mask(batch_bool<double, A> {}, mask, wasm {}));
assert(!(mask & ~0x3ul) && "inbound mask");
return wasm_v128_load((const v128_t*)lut8[mask]);
}
}

Expand Down Expand Up @@ -1114,44 +1140,6 @@ namespace xsimd
return wasm_f64x2_extract_lane(tmp2, 0);
}

// reduce_max
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
inline T reduce_max(batch<T, A> const& self, requires_arch<wasm>) noexcept
{
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
batch<T, A> acc0 = max(self, step0);

batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
batch<T, A> acc1 = max(acc0, step1);

batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
batch<T, A> acc2 = max(acc1, step2);
if (sizeof(T) == 2)
return acc2.get(0);
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
batch<T, A> acc3 = max(acc2, step3);
return acc3.get(0);
}

// reduce_min
template <class A, class T, class _ = typename std::enable_if<(sizeof(T) <= 2), void>::type>
inline T reduce_min(batch<T, A> const& self, requires_arch<wasm>) noexcept
{
batch<T, A> step0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
batch<T, A> acc0 = min(self, step0);

batch<T, A> step1 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 1, 0, 0, 0);
batch<T, A> acc1 = min(acc0, step1);

batch<T, A> step2 = wasm_i16x8_shuffle(acc1, wasm_i16x8_splat(0), 1, 0, 0, 0, 4, 5, 6, 7);
batch<T, A> acc2 = min(acc1, step2);
if (sizeof(T) == 2)
return acc2.get(0);
batch<T, A> step3 = bitwise_cast<T>(bitwise_cast<uint16_t>(acc2) >> 8);
batch<T, A> acc3 = min(acc2, step3);
return acc3.get(0);
}

// rsqrt
template <class A>
inline batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
Expand Down Expand Up @@ -1259,29 +1247,15 @@ namespace xsimd

// shuffle
template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3> mask, requires_arch<wasm>) noexcept
inline batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<batch<ITy, A>, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
{
// shuffle within lane
if (I0 < 4 && I1 < 4 && I2 >= 4 && I3 >= 4)
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);

// shuffle within opposite lane
if (I0 >= 4 && I1 >= 4 && I2 < 4 && I3 < 4)
return wasm_i32x4_shuffle(y, x, I0, I1, I2, I3);
return shuffle(x, y, mask, generic {});
return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
}

template <class A, class ITy, ITy I0, ITy I1>
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1> mask, requires_arch<wasm>) noexcept
inline batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<batch<ITy, A>, I0, I1>, requires_arch<wasm>) noexcept
{
// shuffle within lane
if (I0 < 2 && I1 >= 2)
return wasm_i64x2_shuffle(x, y, I0, I1);

// shuffle within opposite lane
if (I0 >= 2 && I1 < 2)
return wasm_i64x2_shuffle(y, x, I0, I1);
return shuffle(x, y, mask, generic {});
return wasm_i64x2_shuffle(x, y, I0, I1);
}

// set
Expand Down Expand Up @@ -1500,7 +1474,6 @@ namespace xsimd
}

// swizzle

template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
inline batch<float, A> swizzle(batch<float, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
{
Expand All @@ -1516,7 +1489,7 @@ namespace xsimd
template <class A, uint64_t V0, uint64_t V1>
inline batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<batch<uint64_t, A>, V0, V1>, requires_arch<wasm>) noexcept
{
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2 * V0, 2 * V0 + 1, 2 * V1, 2 * V1 + 1);
return wasm_i64x2_shuffle(self, self, V0, V1);
}

template <class A, uint64_t V0, uint64_t V1>
Expand All @@ -1528,7 +1501,7 @@ namespace xsimd
template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
inline batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<batch<uint32_t, A>, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
{
return wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), V0, V1, V2, V3);
return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
}

template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
Expand All @@ -1537,6 +1510,32 @@ namespace xsimd
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, wasm {}));
}

template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
inline batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
{
return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7);
}

template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
inline batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<batch<uint16_t, A>, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
{
return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, wasm {}));
}

template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
inline batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
{
return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
}

template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
inline batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<batch<uint8_t, A>, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
{
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
}

// trunc
template <class A>
inline batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept
Expand Down Expand Up @@ -1625,4 +1624,4 @@ namespace xsimd
}
}

#endif
#endif
18 changes: 9 additions & 9 deletions include/xsimd/config/xsimd_arch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,22 @@ namespace xsimd
{
};

template <class... Archs>
template <unsigned... Vals>
struct is_sorted;

template <>
struct is_sorted<> : std::true_type
{
};

template <class Arch>
struct is_sorted<Arch> : std::true_type
template <unsigned Val>
struct is_sorted<Val> : std::true_type
{
};

template <class A0, class A1, class... Archs>
struct is_sorted<A0, A1, Archs...>
: std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>,
template <unsigned V0, unsigned V1, unsigned... Vals>
struct is_sorted<V0, V1, Vals...>
: std::conditional<(V0 >= V1), is_sorted<V1, Vals...>,
std::false_type>::type
{
};
Expand Down Expand Up @@ -111,7 +111,7 @@ namespace xsimd
struct arch_list
{
#ifndef NDEBUG
static_assert(detail::is_sorted<Archs...>::value,
static_assert(detail::is_sorted<Archs::version()...>::value,
"architecture list must be sorted by version");
#endif

Expand Down Expand Up @@ -190,13 +190,13 @@ namespace xsimd
struct unsupported
{
};
using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
using all_x86_architectures = arch_list<avx512vnni, avx512vbmi, avx512ifma, avx512pf, avx512bw, avx512er, avx512dq, avx512cd, avx512f, avxvnni, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
using all_rvv_architectures = arch_list<detail::rvv<512>, detail::rvv<256>, detail::rvv<128>>;
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
using all_riscv_architectures = all_rvv_architectures;
using all_wasm_architectures = arch_list<wasm>;
using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures, all_riscv_architectures, all_wasm_architectures>::type;
using all_architectures = typename detail::join<all_riscv_architectures, all_wasm_architectures, all_arm_architectures, all_x86_architectures>::type;

using supported_architectures = typename detail::supported<all_architectures>::type;

Expand Down
8 changes: 4 additions & 4 deletions include/xsimd/types/xsimd_avx512ifma_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#ifndef XSIMD_AVX512IFMA_REGISTER_HPP
#define XSIMD_AVX512IFMA_REGISTER_HPP

#include "./xsimd_avx512dq_register.hpp"
#include "./xsimd_avx512bw_register.hpp"

namespace xsimd
{
Expand All @@ -22,11 +22,11 @@ namespace xsimd
*
* AVX512IFMA instructions
*/
struct avx512ifma : avx512dq
struct avx512ifma : avx512bw
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512IFMA; }
static constexpr bool available() noexcept { return true; }
static constexpr unsigned version() noexcept { return generic::version(3, 4, 0); }
static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
static constexpr char const* name() noexcept { return "avx512ifma"; }
};

Expand All @@ -40,7 +40,7 @@ namespace xsimd
using type = simd_avx512_bool_register<T>;
};

XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512dq);
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512bw);

}
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/types/xsimd_avx512vbmi_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace xsimd
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI; }
static constexpr bool available() noexcept { return true; }
static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
static constexpr char const* name() noexcept { return "avx512vbmi"; }
};

Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/types/xsimd_avx512vnni_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace xsimd
{
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI; }
static constexpr bool available() noexcept { return true; }
static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
static constexpr unsigned version() noexcept { return generic::version(3, 7, 0); }
static constexpr char const* name() noexcept { return "avx512vnni"; }
};

Expand Down
2 changes: 1 addition & 1 deletion include/xsimd/types/xsimd_sve_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace xsimd
static constexpr bool available() noexcept { return true; }
static constexpr bool requires_alignment() noexcept { return true; }
static constexpr std::size_t alignment() noexcept { return 16; }
static constexpr unsigned version() noexcept { return generic::version(9, 0, 0); }
static constexpr unsigned version() noexcept { return generic::version(9, Width / 32, 0); }
static constexpr char const* name() noexcept { return "arm64+sve"; }
};
}
Expand Down

0 comments on commit c5c2101

Please sign in to comment.