From 12747a3e5c709c501d20b2eef6883de6cf470cd7 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Tue, 14 Sep 2021 07:23:00 +0200 Subject: [PATCH 1/4] Activate SSE2 test on windows --- .appveyor.yml | 4 ++++ test/CMakeLists.txt | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.appveyor.yml b/.appveyor.yml index 90e9d4cac..182c0f2f7 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -9,6 +9,10 @@ environment: global: MINICONDA: C:\xsimd-conda matrix: + - JOB: "SSE2" + CXXFLAGS: "" + VCVARSALL: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\vcvarsall.bat" + RUNTEST: ".\\test_xsimd" - JOB: "AVX2" CXXFLAGS: "/arch:AVX2" VCVARSALL: "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\vcvarsall.bat" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 69f90ee84..dd8bf86a2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -89,7 +89,6 @@ endif() if(CMAKE_CXX_COMPILER_ID MATCHES MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MP /bigobj") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267 /wd4005 /wd4146 /wd4800") set(CMAKE_EXE_LINKER_FLAGS /MANIFEST:NO) endif() From 2f02ee277035c80bd3a8962ff8468d67e5b37861 Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Tue, 14 Sep 2021 11:19:30 +0800 Subject: [PATCH 2/4] Fix SSE build failure with MSVC MSVC does not define __SSEn__ macros. --- include/xsimd/config/xsimd_config.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 1afbf2cfd..d7bef41ec 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -222,6 +222,10 @@ #define XSIMD_WITH_SSE4_2 1 #endif +#if !defined(__clang__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)) +#define XSIMD_WITH_SSE4_2 1 +#endif + #if XSIMD_WITH_SSE4_2 #define XSIMD_WITH_SSE4_1 1 #endif From a9f3ccb74d903c835ee8124db4c9b8a39050cdfa Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 23 Sep 2021 23:22:12 +0200 Subject: [PATCH 3/4] Fix extract_pair test and generic implementation --- include/xsimd/arch/generic/xsimd_generic_memory.hpp | 4 ++-- test/test_extract_pair.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/xsimd/arch/generic/xsimd_generic_memory.hpp b/include/xsimd/arch/generic/xsimd_generic_memory.hpp index b0de5ff92..cfeccde1e 100644 --- a/include/xsimd/arch/generic/xsimd_generic_memory.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_memory.hpp @@ -25,10 +25,10 @@ namespace xsimd { for (std::size_t j = 0 ; j < (size - i); ++j) { - concat_buffer[j] = self_buffer[i + j]; + concat_buffer[j] = other_buffer[i + j]; if(j < i) { - concat_buffer[size - 1 - j] = other_buffer[i - 1 - j]; + concat_buffer[size - 1 - j] = self_buffer[i - 1 - j]; } } return batch::load_aligned(concat_buffer); diff --git a/test/test_extract_pair.cpp b/test/test_extract_pair.cpp index ec2f618bb..af64a10b1 100644 --- a/test/test_extract_pair.cpp +++ b/test/test_extract_pair.cpp @@ -34,10 +34,10 @@ namespace xsimd /* Expected shuffle data */ for (int i = 0 ; i < (num - index); ++i) { - exped[i] = lhs_in[i + index]; + exped[i] = rhs_in[i + index]; if(i < index) { - exped[num - 1 - i] = rhs_in[index - 1 - i]; + exped[num - 1 - i] = lhs_in[index - 1 - i]; } } vects.push_back(std::move(exped)); From f4f2945d0a5c86a07b1e1a8146810a89c932e1d6 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 24 Sep 2021 23:53:33 +0200 Subject: [PATCH 4/4] Fix extract_pair implementation on arm --- include/xsimd/arch/xsimd_neon.hpp | 18 +++++++++--------- include/xsimd/arch/xsimd_neon64.hpp | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp index ac08034c2..4593ab19d 100644 --- a/include/xsimd/arch/xsimd_neon.hpp +++ b/include/xsimd/arch/xsimd_neon.hpp @@ -1411,7 +1411,7 @@ namespace xsimd { if (n == I) { - return vextq_u8(lhs, rhs, I); + return vextq_u8(rhs, lhs, I); } else { @@ -1424,7 +1424,7 @@ namespace xsimd { if (n == I) { - return vextq_s8(lhs, rhs, I); + return vextq_s8(rhs, lhs, I); } else { @@ -1437,7 +1437,7 @@ namespace xsimd { if (n == I) { - return vextq_u16(lhs, rhs, I); + return vextq_u16(rhs, lhs, I); } else { @@ -1450,7 +1450,7 @@ namespace xsimd { if (n == I) { - return vextq_s16(lhs, rhs, I); + return vextq_s16(rhs, lhs, I); } else { @@ -1463,7 +1463,7 @@ namespace xsimd { if (n == I) { - return vextq_u32(lhs, rhs, I); + return vextq_u32(rhs, lhs, I); } else { @@ -1476,7 +1476,7 @@ namespace xsimd { if (n == I) { - return vextq_s32(lhs, rhs, I); + return vextq_s32(rhs, lhs, I); } else { @@ -1489,7 +1489,7 @@ namespace xsimd { if (n == I) { - return vextq_u64(lhs, rhs, I); + return vextq_u64(rhs, lhs, I); } else { @@ -1502,7 +1502,7 @@ namespace xsimd { if (n == I) { - return vextq_s64(lhs, rhs, I); + return vextq_s64(rhs, lhs, I); } else { @@ -1515,7 +1515,7 @@ namespace xsimd { if (n == I) { - return vextq_f32(lhs, rhs, I); + return vextq_f32(rhs, lhs, I); } else { diff --git a/include/xsimd/arch/xsimd_neon64.hpp b/include/xsimd/arch/xsimd_neon64.hpp index 869a7e247..0a22db16a 100644 --- a/include/xsimd/arch/xsimd_neon64.hpp +++ b/include/xsimd/arch/xsimd_neon64.hpp @@ -713,7 +713,7 @@ namespace xsimd { if (n == I) { - return vextq_f64(lhs, rhs, I); + return vextq_f64(rhs, lhs, I); } else {