Skip to content

Commit

Permalink
Merge pull request #19 from JohanMabille/avx_integer
Browse files Browse the repository at this point in the history
avx integer operation emulation
  • Loading branch information
JohanMabille committed May 23, 2017
2 parents 88eba2f + 2d96c5c commit 5f47ed6
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 10 deletions.
124 changes: 124 additions & 0 deletions include/xsimd/types/xsimd_avx_int.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,24 @@ namespace xsimd
* batch_bool<int, 8> implementation *
*************************************/

#if XSIMD_X86_INSTR_SET < XSIMD_X86_AVX2_VERSION

#define XSIMD_SPLIT_AVX(name)\
__m128i name##_low = _mm256_castsi256_si128(name);\
__m128i name##_high = _mm256_extractf128_si256(name, 1)

#define XSIMD_RETURN_MERGED_SSE(res_low, res_high)\
__m256i result = _mm256_castsi128_si256(res_low);\
return _mm256_insertf128_si256(result, res_high, 1)

#define XSIMD_APPLY_SSE_FUNCTION(func, lhs, rhs)\
XSIMD_SPLIT_AVX(lhs);\
XSIMD_SPLIT_AVX(rhs);\
__m128i res_low = func(lhs_low, rhs_low);\
__m128i res_high = func(lhs_high, rhs_high);\
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif

inline batch_bool<int, 8>::batch_bool()
{
}
Expand Down Expand Up @@ -148,27 +166,50 @@ namespace xsimd

inline batch_bool<int, 8> operator&(const batch_bool<int, 8>& lhs, const batch_bool<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_and_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_and_si128, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator|(const batch_bool<int, 8>& lhs, const batch_bool<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_or_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_or_si128, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator^(const batch_bool<int, 8>& lhs, const batch_bool<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_xor_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_xor_si128, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator~(const batch_bool<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_xor_si256(rhs, _mm256_set1_epi32(-1));
#else
XSIMD_SPLIT_AVX(rhs);
__m128i res_low = _mm_xor_si128(rhs_low, _mm_set1_epi32(-1));
__m128i res_high = _mm_xor_si128(rhs_high, _mm_set1_epi32(-1));
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif
}

inline batch_bool<int, 8> operator==(const batch_bool<int, 8>& lhs, const batch_bool<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_cmpeq_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_cmpeq_epi32, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator!=(const batch_bool<int, 8>& lhs, const batch_bool<int, 8>& rhs)
Expand Down Expand Up @@ -234,22 +275,41 @@ namespace xsimd

inline batch<int, 8> operator-(const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_sub_epi32(_mm256_setzero_si256(), rhs);
#else
XSIMD_SPLIT_AVX(rhs);
__m128i res_low = _mm_sub_epi32(_mm_setzero_si128(), rhs_low);
__m128i res_high = _mm_sub_epi32(_mm_setzero_si128(), rhs_high);
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif
}

inline batch<int, 8> operator+(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_add_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_add_epi32, lhs, rhs);
#endif
}

inline batch<int, 8> operator-(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_sub_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_sub_epi32, lhs, rhs);
#endif
}

inline batch<int, 8> operator*(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_mullo_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_mullo_epi32, lhs, rhs);
#endif
}

/*inline batch<int, 4> operator/(const batch<int, 4>& lhs, const batch<int, 4>& rhs)
Expand All @@ -258,7 +318,11 @@ namespace xsimd

inline batch_bool<int, 8> operator==(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_cmpeq_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_cmpeq_epi32, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator!=(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
Expand All @@ -268,7 +332,11 @@ namespace xsimd

inline batch_bool<int, 8> operator<(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_cmpgt_epi32(rhs, lhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_cmpeq_epi32, lhs, rhs);
#endif
}

inline batch_bool<int, 8> operator<=(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
Expand All @@ -278,37 +346,71 @@ namespace xsimd

inline batch<int, 8> operator&(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_and_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_and_si128, lhs, rhs);
#endif
}

inline batch<int, 8> operator|(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_or_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_or_si128, lhs, rhs);
#endif
}

inline batch<int, 8> operator^(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_xor_si256(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_xor_si128, lhs, rhs);
#endif
}

inline batch<int, 8> operator~(const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_xor_si256(rhs, _mm256_set1_epi32(-1));
#else
XSIMD_SPLIT_AVX(rhs);
__m128i res_low = _mm_xor_si128(rhs_low, _mm_set1_epi32(-1));
__m128i res_high = _mm_xor_si128(rhs_high, _mm_set1_epi32(-1));
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif
}

inline batch<int, 8> min(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_min_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_min_epi32, lhs, rhs);
#endif
}

inline batch<int, 8> max(const batch<int, 8>& lhs, const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_max_epi32(lhs, rhs);
#else
XSIMD_APPLY_SSE_FUNCTION(_mm_max_epi32, lhs, rhs);
#endif
}

inline batch<int, 8> abs(const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_sign_epi32(rhs, rhs);
#else
XSIMD_SPLIT_AVX(rhs);
__m128i res_low = _mm_sign_epi32(rhs_low, rhs_low);
__m128i res_high = _mm_sign_epi32(rhs_high, rhs_high);
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif
}

inline batch<int, 8> fma(const batch<int, 8>& x, const batch<int, 8>& y, const batch<int, 8>& z)
Expand All @@ -331,23 +433,45 @@ namespace xsimd
return -x * y - z;
}


inline int hadd(const batch<int, 8>& rhs)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
__m256i tmp1 = _mm256_hadd_epi32(rhs, rhs);
__m256i tmp2 = _mm256_hadd_epi32(tmp1, tmp1);
__m128i tmp3 = _mm256_extracti128_si256(tmp2, 1);
__m128i tmp4 = _mm_add_epi32(_mm256_castsi256_si128(tmp2), tmp3);
return _mm_cvtsi128_si32(tmp4);
#else
XSIMD_SPLIT_AVX(rhs);
__m128i tmp1 = _mm_add_epi32(rhs_low, rhs_high);
__m128i tmp2 = _mm_hadd_epi32(tmp1, tmp1);
__m128i tmp3 = _mm_hadd_epi32(tmp2, tmp2);
return _mm_cvtsi128_si32(tmp3);
#endif
}

//inline batch<int, 4> haddp(const batch<int, 4>* row);

inline batch<int, 8> select(const batch_bool<int, 8>& cond, const batch<int, 8>& a, const batch<int, 8>& b)
{
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
return _mm256_blendv_epi8(b, a, cond);
#else
XSIMD_SPLIT_AVX(cond);
XSIMD_SPLIT_AVX(a);
XSIMD_SPLIT_AVX(b);
__m128i res_low = _mm_blendv_epi8(b_low, a_low, cond_low);
__m128i res_high = _mm_blendv_epi8(b_high, a_high, cond_high);
XSIMD_RETURN_MERGED_SSE(res_low, res_high);
#endif
}

}

#undef XSIMD_APPLY_SSE_FUNCTION
#undef XSIMD_RETURN_MERGED_SSE
#undef XSIMD_SPLIT_AVX

#endif

7 changes: 2 additions & 5 deletions include/xsimd/types/xsimd_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,14 @@
#endif

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX_VERSION
#include "xsimd_avx_int.hpp"
#include "xsimd_avx_float.hpp"
#include "xsimd_avx_double.hpp"
#define XSIMD_BACH_INT_SIZE 8
#define XSIMD_BATCH_FLOAT_SIZE 8
#define XSIMD_BATCH_DOUBLE_SIZE 4
#endif

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
#include "xsimd_avx_int.hpp"
#define XSIMD_BACH_INT_SIZE 8
#endif

namespace xsimd
{

Expand Down
5 changes: 0 additions & 5 deletions test/test_xsimd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX_VERSION
#include "xsimd/types/xsimd_avx_double.hpp"
#include "xsimd/types/xsimd_avx_float.hpp"
#endif

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
#include "xsimd/types/xsimd_avx_int.hpp"
#endif

Expand Down Expand Up @@ -86,9 +83,7 @@ TEST(xsimd, avx_double_basic)
bool res = xsimd::test_simd<xsimd::batch<double, 4>, 4, 32>(out, "avx double");
EXPECT_TRUE(res);
}
#endif

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
TEST(xsimd, avx_int_basic)
{
std::ofstream out("log/sse_avx_basic.log", std::ios_base::out);
Expand Down

0 comments on commit 5f47ed6

Please sign in to comment.