From a8a07e067a4dede92f926929bbe7cd16ebe1fd26 Mon Sep 17 00:00:00 2001 From: vvish Date: Thu, 20 Nov 2025 21:45:51 +0100 Subject: [PATCH 1/7] Algo ported --- .clang-format | 8 + .github/workflows/linux.yml | 126 +++++++++++ .gitignore | 4 + CMakeLists.txt | 67 ++++++ include/xsimd_algo/algorithms.hpp | 201 +++++++++++++++++ test/CMakeLists.txt | 95 ++++++++ test/main.cpp | 31 +++ test/test_algorithms.cpp | 348 ++++++++++++++++++++++++++++++ xsimd-algorithmConfig.cmake.in | 8 + 9 files changed, 888 insertions(+) create mode 100644 .clang-format create mode 100644 .github/workflows/linux.yml create mode 100644 CMakeLists.txt create mode 100644 include/xsimd_algo/algorithms.hpp create mode 100644 test/CMakeLists.txt create mode 100644 test/main.cpp create mode 100644 test/test_algorithms.cpp create mode 100644 xsimd-algorithmConfig.cmake.in diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d0da083 --- /dev/null +++ b/.clang-format @@ -0,0 +1,8 @@ +--- +BasedOnStyle: WebKit +AlignAfterOpenBracket: Align +AlignConsecutiveDeclarations: 'false' +BreakBeforeBraces: Allman +NamespaceIndentation: All + +... diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 0000000..8737af1 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,126 @@ +name: Linux build +on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.ref }} + cancel-in-progress: true +defaults: + run: + shell: bash -l {0} +jobs: + build: + runs-on: ubuntu-latest + name: '${{ matrix.sys.compiler }} ${{ matrix.sys.version }} - ${{ matrix.sys.flags }}' + strategy: + matrix: + sys: + - { compiler: 'gcc', version: '12', flags: 'force_no_instr_set' } + - { compiler: 'gcc', version: '13', flags: 'enable_xtl_complex' } + - { compiler: 'gcc', version: '14', flags: 'avx' } + - { compiler: 'gcc', version: '13', flags: 'avx512' } + - { compiler: 'gcc', version: '12', flags: 'i386' } + - { compiler: 'gcc', version: '13', flags: 'avx512pf' } + - { compiler: 'gcc', version: '13', flags: 'avx512vbmi' } + - { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' } + - { compiler: 'gcc', version: '13', flags: 'avx512vnni' } + - { compiler: 'clang', version: '16', flags: 'force_no_instr_set' } + - { compiler: 'clang', version: '16', flags: 'enable_xtl_complex' } + - { compiler: 'clang', version: '17', flags: 'avx' } + - { compiler: 'clang', version: '17', flags: 'sse3' } + - { compiler: 'clang', version: '18', flags: 'avx512' } + steps: + - name: Setup compiler + if: ${{ matrix.sys.compiler == 'gcc' }} + run: | + GCC_VERSION=${{ matrix.sys.version }} + sudo apt-get update + sudo apt-get --no-install-suggests --no-install-recommends install g++-$GCC_VERSION + sudo dpkg --add-architecture i386 + sudo add-apt-repository ppa:ubuntu-toolchain-r/test + sudo apt-get update + sudo apt-get --no-install-suggests --no-install-recommends install gcc-$GCC_VERSION-multilib g++-$GCC_VERSION-multilib linux-libc-dev:i386 + CC=gcc-$GCC_VERSION + echo "CC=$CC" >> $GITHUB_ENV + CXX=g++-$GCC_VERSION + echo "CXX=$CXX" >> $GITHUB_ENV + - name: Setup compiler + if: ${{ matrix.sys.compiler == 'clang' }} + run: | + LLVM_VERSION=${{ matrix.sys.version }} + sudo apt-get update || exit 1 + sudo apt-get --no-install-suggests --no-install-recommends install clang-$LLVM_VERSION || exit 1 + sudo apt-get --no-install-suggests --no-install-recommends install g++ g++-multilib || exit 1 + sudo ln -s /usr/include/asm-generic /usr/include/asm + CC=clang-$LLVM_VERSION + echo "CC=$CC" >> $GITHUB_ENV + CXX=clang++-$LLVM_VERSION + echo "CXX=$CXX" >> $GITHUB_ENV + - name: Checkout xsimd-algorithm + uses: actions/checkout@v3 + # - name: Install mamba + # uses: mamba-org/setup-micromamba@v1 + # with: + # environment-file: environment.yml + - name: Setup SDE + if: startswith(matrix.sys.flags, 'avx512') + run: sh install_sde.sh + - name: Configure build + env: + CC: ${{ env.CC }} + CXX: ${{ env.CXX }} + run: | + if [[ '${{ matrix.sys.flags }}' == 'enable_xtl_complex' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DENABLE_XTL_COMPLEX=ON" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=sandybridge" + fi + if [[ '${{ matrix.sys.flags }}' == 'sse3' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=nocona" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx512' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=skylake-avx512" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx512pf' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=knl" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx512vbmi' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=cannonlake" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx512vbmi2' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=icelake-server" + fi + if [[ '${{ matrix.sys.flags }}' == 'avx512vnni' ]]; then + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DTARGET_ARCH=knm" + fi + if [[ '${{ matrix.sys.flags }}' == 'i386' ]]; then + CXX_FLAGS="$CXX_FLAGS -m32" + fi + if [[ '${{ matrix.sys.flags }}' == 'force_no_instr_set' ]]; then + : + else + CMAKE_EXTRA_ARGS="$CMAKE_EXTRA_ARGS -DXSIMD_ENABLE_WERROR=ON" + fi + + # Cheap way of spotting uninitialized read + CXX_FLAGS="$CXX_FLAGS -ftrivial-auto-var-init=pattern" + + mkdir _build + cd _build + cmake .. -DBUILD_TESTS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=$CC \ + -DCMAKE_CXX_COMPILER=$CXX \ + $CMAKE_EXTRA_ARGS \ + -DCMAKE_CXX_FLAGS='$CXX_FLAGS' \ + -G Ninja + - name: Build + run: ninja -C _build + - name: Test + run: | + cd _build + cd test + if echo '${{ matrix.sys.flags }}' | grep -q 'avx512' ; then + ../../sde-external-9.48.0-2024-11-25-lin/sde64 -tgl -- ./test_xsimd_algorithm + else + ./test_xsimd_algorithm + fi diff --git a/.gitignore b/.gitignore index 259148f..d83a178 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ *.exe *.out *.app + +# Build +build/ +build_*/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..cff61e9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,67 @@ +cmake_minimum_required(VERSION 3.15..3.29) +project(xsimd-algorithm CXX) + +set(XSIMDALGO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) + +# Dependencies +# ============ + +set(xsimd_REQUIRED_VERSION 13.2.0) +if(TARGET xsimd) + set(xsimd_VERSION ${XSIMD_VERSION_MAJOR}.${XSIMD_VERSION_MINOR}.${XSIMD_VERSION_PATCH}) + if(${xsimd_VERSION} VERSION_LESS ${xsimd_REQUIRED_VERSION}) + message(ERROR "Mismatch xsimd versions. Found '${xsimd_VERSION}' but requires: '${xsimd_REQUIRED_VERSION}'") + else() + message(STATUS "Found xsimd v${xsimd_VERSION}") + endif() +else() + find_package(xsimd ${xsimd_REQUIRED_VERSION} REQUIRED) + message(STATUS "Found xsimd: ${xsimd_INCLUDE_DIRS}/xsimd") +endif() + +# Build +# ===== + +set(XSIMDALGO_HEADERS + ${XSIMDALGO_INCLUDE_DIR}/xsimd_algo/algorithms.hpp +) + +add_library(xsimd-algorithm INTERFACE) + +target_include_directories(xsimd-algorithm INTERFACE + $ + $ + $) + +target_compile_features(xsimd-algorithm INTERFACE cxx_std_20) +target_link_libraries(xsimd-algorithm INTERFACE xsimd) + +OPTION(BUILD_TESTS "xsimd-algorithm test suite" OFF) + +if(BUILD_TESTS) + enable_testing() + add_subdirectory(test) +endif() + +# Installation +# ============ + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +install(TARGETS xsimd-algorithm + EXPORT ${PROJECT_NAME}-targets) + +# Makes the project importable from the build directory +export(EXPORT ${PROJECT_NAME}-targets + FILE "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Targets.cmake") + +install(DIRECTORY ${XSIMDALGO_INCLUDE_DIR}/xsimd_algo + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +set(XSIMDALGO_CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}" CACHE + STRING "install path for xsimd-algorithmConfig.cmake") + +configure_package_config_file(${PROJECT_NAME}Config.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION ${XSIMDALGO_CMAKECONFIG_INSTALL_DIR}) diff --git a/include/xsimd_algo/algorithms.hpp b/include/xsimd_algo/algorithms.hpp new file mode 100644 index 0000000..7c3935b --- /dev/null +++ b/include/xsimd_algo/algorithms.hpp @@ -0,0 +1,201 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_ALGORITHMS_HPP +#define XSIMD_ALGORITHMS_HPP + +#include +#include +#include +#include + +#include "xsimd/xsimd.hpp" + +namespace xsimd +{ + template + void transform(I1 first, I2 last, O1 out_first, UF&& f) noexcept + { + using value_type = typename std::decay::type; + using batch_type = batch; + + std::size_t size = static_cast(std::distance(first, last)); + std::size_t simd_size = batch_type::size; + + const auto* ptr_begin = &(*first); + auto* ptr_out = &(*out_first); + + std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size); + std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); + std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); + + if (align_begin == out_align) + { + for (std::size_t i = 0; i < align_begin; ++i) + { + out_first[i] = f(first[i]); + } + + for (std::size_t i = align_begin; i < align_end; i += simd_size) + { + batch_type batch = batch_type::load_aligned(&first[i]); + f(batch).store_aligned(&out_first[i]); + } + + for (std::size_t i = align_end; i < size; ++i) + { + out_first[i] = f(first[i]); + } + } + else + { + for (std::size_t i = 0; i < align_begin; ++i) + { + out_first[i] = f(first[i]); + } + + for (std::size_t i = align_begin; i < align_end; i += simd_size) + { + batch_type batch = batch_type::load_aligned(&first[i]); + f(batch).store_unaligned(&out_first[i]); + } + + for (std::size_t i = align_end; i < size; ++i) + { + out_first[i] = f(first[i]); + } + } + } + + template + void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f) noexcept + { + using value_type = typename std::decay::type; + using batch_type = batch; + + std::size_t size = static_cast(std::distance(first_1, last_1)); + std::size_t simd_size = batch_type::size; + + const auto* ptr_begin_1 = &(*first_1); + const auto* ptr_begin_2 = &(*first_2); + auto* ptr_out = &(*out_first); + + std::size_t align_begin_1 = xsimd::get_alignment_offset(ptr_begin_1, size, simd_size); + std::size_t align_begin_2 = xsimd::get_alignment_offset(ptr_begin_2, size, simd_size); + std::size_t out_align = xsimd::get_alignment_offset(ptr_out, size, simd_size); + std::size_t align_end = align_begin_1 + ((size - align_begin_1) & ~(simd_size - 1)); + +#define XSIMD_LOOP_MACRO(A1, A2, A3) \ + for (std::size_t i = 0; i < align_begin_1; ++i) \ + { \ + out_first[i] = f(first_1[i], first_2[i]); \ + } \ + \ + batch_type batch_1, batch_2; \ + for (std::size_t i = align_begin_1; i < align_end; i += simd_size) \ + { \ + batch_1 = batch_type::A1(&first_1[i]); \ + batch_2 = batch_type::A2(&first_2[i]); \ + f(batch_1, batch_2).A3(&out_first[i]); \ + } \ + \ + for (std::size_t i = align_end; i < size; ++i) \ + { \ + out_first[i] = f(first_1[i], first_2[i]); \ + } + + if (align_begin_1 == out_align && align_begin_1 == align_begin_2) + { + XSIMD_LOOP_MACRO(load_aligned, load_aligned, store_aligned); + } + else if (align_begin_1 == out_align && align_begin_1 != align_begin_2) + { + XSIMD_LOOP_MACRO(load_aligned, load_unaligned, store_aligned); + } + else if (align_begin_1 != out_align && align_begin_1 == align_begin_2) + { + XSIMD_LOOP_MACRO(load_aligned, load_aligned, store_unaligned); + } + else if (align_begin_1 != out_align && align_begin_1 != align_begin_2) + { + XSIMD_LOOP_MACRO(load_aligned, load_unaligned, store_unaligned); + } + +#undef XSIMD_LOOP_MACRO + } + + // TODO: Remove this once we drop C++11 support + namespace detail + { + struct plus + { + template + auto operator()(X&& x, Y&& y) noexcept -> decltype(x + y) { return x + y; } + }; + } + + template + Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus {}) noexcept + { + using value_type = typename std::decay::type; + using batch_type = batch; + + std::size_t size = static_cast(std::distance(first, last)); + constexpr std::size_t simd_size = batch_type::size; + + if (size < simd_size) + { + while (first != last) + { + init = binfun(init, *first++); + } + return init; + } + + const auto* const ptr_begin = &(*first); + + std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size); + std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); + + // reduce initial unaligned part + for (std::size_t i = 0; i < align_begin; ++i) + { + init = binfun(init, first[i]); + } + + // reduce aligned part + auto ptr = ptr_begin + align_begin; + batch_type batch_init = batch_type::load_aligned(ptr); + ptr += simd_size; + for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) + { + batch_type batch = batch_type::load_aligned(ptr); + batch_init = binfun(batch_init, batch); + } + + // reduce across batch + alignas(batch_type) std::array arr; + xsimd::store_aligned(arr.data(), batch_init); + for (auto x : arr) + init = binfun(init, x); + + // reduce final unaligned part + for (std::size_t i = align_end; i < size; ++i) + { + init = binfun(init, first[i]); + } + + return init; + } + +} + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..c301a28 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,95 @@ +############################################################################ +# Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and # +# Martin Renou # +# Copyright (c) QuantStack # +# Copyright (c) Serge Guelton # +# # +# Distributed under the terms of the BSD 3-Clause License. # +# # +# The full license is in the file LICENSE, distributed with this software. # +############################################################################ + +cmake_minimum_required(VERSION 3.8) + + +project(xsimd-algorithm-test) + +if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + enable_testing() + find_package(xsimd-algorithm REQUIRED CONFIG) + message(STATUS "${xsimd-algorithm_INCLUDE_DIRS}") +endif () + +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting tests build type to Release") + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) +else() + message(STATUS "Tests build type is ${CMAKE_BUILD_TYPE}") +endif() + +include(CheckCXXCompilerFlag) + +string(TOUPPER "${CMAKE_BUILD_TYPE}" U_CMAKE_BUILD_TYPE) + +OPTION(XSIMD_ENABLE_WERROR "Turn on -Werror" OFF) + + +if(CMAKE_CXX_COMPILER_ID MATCHES MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /MP /bigobj") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267 /wd4005 /wd4146 /wd4800") + set(CMAKE_EXE_LINKER_FLAGS /MANIFEST:NO) +endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES Clang AND MSVC AND WIN32) # We are using clang-cl + add_compile_options(/EHsc /bigobj) + set(CMAKE_EXE_LINKER_FLAGS /MANIFEST:NO) +endif() + +set(XSIMD_ALGORITHM_TESTS + main.cpp + test_algorithms.cpp +) + +add_executable(test_xsimd_algorithm ${XSIMD_ALGORITHM_TESTS})# ${XSIMD_ALGORITHM_HEADERS}) +target_link_libraries(test_xsimd_algorithm PRIVATE xsimd-algorithm) + +option(DOWNLOAD_DOCTEST OFF) +find_package(doctest QUIET) +if (doctest_FOUND) + set(DOCTEST_MINIMAL_VERSION 2.4.9) + if (doctest_VERSION VERSION_LESS DOCTEST_MINIMAL_VERSION) + message(FATAL_ERROR "Requires doctest >= ${DOCTEST_MINIMAL_VERSION}") + endif() + target_link_libraries(test_xsimd_algorithm PRIVATE doctest::doctest) +elseif(DOWNLOAD_DOCTEST) + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/doctest") + file(DOWNLOAD + "https://github.com/doctest/doctest/releases/download/v2.4.9/doctest.h" + "${CMAKE_CURRENT_BINARY_DIR}/doctest/doctest.h" + STATUS DOWNLOAD_DOCTEST_STATUS) + list(GET DOWNLOAD_DOCTEST_STATUS 0 DOWNLOAD_DOCTEST_STATUS_CODE) + list(GET DOWNLOAD_DOCTEST_STATUS 1 DOWNLOAD_DOCTEST_ERROR_MESSAGE) + if(${DOWNLOAD_DOCTEST_STATUS_CODE} EQUAL 0) + message(STATUS "Successfully downloaded doctest.h") + else() + message(FATAL_ERROR "Error occurred during download of doctest: ${DOWNLOAD_DOCTEST_ERROR_MESSAGE}") + endif() + target_include_directories(test_xsimd_algorithm PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +else() + message(FATAL_ERROR " + Cannot find required doctest component. + Please either set CMAKE_PREFIX_PATH to the location of doctestConfig.cmake, + or set DOWNLOAD_DOCTEST=ON") +endif() + +add_test(NAME test_xsimd_algorithm COMMAND test_xsimd_algorithm) + +if (CROSS_COMPILE_ARM) + add_custom_target(xtest COMMAND qemu-arm -L /usr/arm-linux-gnueabi/ test_xsimd_algorithm DEPENDS test_xsimd_algorithm) +else() + add_custom_target(xtest COMMAND test_xsimd_algorithm DEPENDS test_xsimd_algorithm) +endif() + +if (XSIMD_ENABLE_WERROR) + target_compile_options(test_xsimd_algorithm PRIVATE -Werror -Wall -DXSIMD_SKIP_ON_WERROR) +endif() diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 0000000..ef66818 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,31 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ +#ifndef EMSCRIPTEN +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "doctest/doctest.h" +#else + +#define DOCTEST_CONFIG_IMPLEMENT +#include "doctest/doctest.h" +#include + +int run_tests() +{ + doctest::Context context; + return context.run(); +} + +EMSCRIPTEN_BINDINGS(my_module) +{ + emscripten::function("run_tests", &run_tests); +} + +#endif \ No newline at end of file diff --git a/test/test_algorithms.cpp b/test/test_algorithms.cpp new file mode 100644 index 0000000..fb070fd --- /dev/null +++ b/test/test_algorithms.cpp @@ -0,0 +1,348 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include "xsimd_algo/algorithms.hpp" + +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE + +#include "doctest/doctest.h" + +#include + +#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 +#define ALGORITHMS_TYPES float, std::complex +#else +#define ALGORITHMS_TYPES float, double, std::complex, std::complex +#endif + +template +struct transform_test +{ + using vector = std::vector; + using aligned_vector = std::vector>; + struct binary_functor + { + template + T operator()(const T& a, const T& b) const + { + return a + b; + } + }; + + struct unary_functor + { + template + T operator()(const T& a) const + { + return -a; + } + }; + + void test_binary_transform() const + { + vector expected(93); + vector a(93, 123), b(93, 123), c(93); + aligned_vector aa(93, 123), ba(93, 123), ca(93); + + std::transform(a.begin(), a.end(), b.begin(), expected.begin(), + binary_functor {}); + + xsimd::transform(a.begin(), a.end(), b.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ba.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), b.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ba.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ba.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), b.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ba.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + } + + void test_unary_transform() const + { + vector expected(93); + vector a(93, 123), c(93); + aligned_vector aa(93, 123), ca(93); + + std::transform(a.begin(), a.end(), expected.begin(), + unary_functor {}); + + xsimd::transform(a.begin(), a.end(), c.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), c.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ca.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ca.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + } +}; + +TEST_CASE_TEMPLATE("transform test", T, ALGORITHMS_TYPES) +{ + transform_test Test; + + SUBCASE("unary") { Test.test_unary_transform(); } + SUBCASE("binary") { Test.test_binary_transform(); } +} + +template +using test_allocator_type = xsimd::aligned_allocator; + +#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 +using test_value_type = float; +#else +using test_value_type = double; +#endif + +struct multiply +{ + template + T operator()(const T& a, const T& b) const + { + return a * b; + } +}; + +TEST_CASE("xsimd_reduce - unaligned_begin_unaligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = std::next(vec.begin()); + auto const end = std::prev(vec.end()); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = std::next(small_vec.begin()); + auto const send = std::prev(small_vec.end()); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - unaligned_begin_aligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = std::next(vec.begin()); + auto const end = vec.end(); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = std::next(small_vec.begin()); + auto const send = small_vec.end(); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - aligned_begin_unaligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = std::prev(vec.end()); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = std::prev(small_vec.end()); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - aligned_begin_aligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = vec.end(); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = small_vec.end(); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - using_custom_binary_function") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = vec.end(); + + if (std::is_same::value) + { + CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); + } + else + { + CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); + } + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = small_vec.end(); + + if (std::is_same::value) + { + CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); + } + else + { + CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); + } + } +} + +#if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE || XSIMD_ARM_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE +TEST_CASE("algorithms - iterator") +{ + std::vector> a(10 * 16, 0.2), b(1000, 2.), c(1000, 3.); + + std::iota(a.begin(), a.end(), 0.f); + std::vector a_cpy(a.begin(), a.end()); + + using batch_type = xsimd::batch; + auto begin = xsimd::aligned_iterator(&a[0]); + auto end = xsimd::aligned_iterator(&a[0] + a.size()); + + for (; begin != end; ++begin) + { + *begin = *begin / 2.f; + } + + for (auto& el : a_cpy) + { + el /= 2.f; + } + + CHECK(a.size() == a_cpy.size()); + CHECK(std::equal(a.begin(), a.end(), a_cpy.begin())); + + begin = xsimd::aligned_iterator(&a[0]); + *begin = sin(*begin); + + for (std::size_t i = 0; i < batch_type::size; ++i) + { + CHECK(a[i] == doctest::Approx(sinf(a_cpy[i])).epsilon(1e-6)); + } + +#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 + std::vector, test_allocator_type>> ca(10 * 16, std::complex(0.2)); + using cbatch_type = xsimd::batch>; + auto cbegin = xsimd::aligned_iterator(&ca[0]); + auto cend = xsimd::aligned_iterator(&ca[0] + a.size()); + + for (; cbegin != cend; ++cbegin) + { + *cbegin = (*cbegin + std::complex(0, .3)) / 2.; + } + + cbegin = xsimd::aligned_iterator(&ca[0]); + *cbegin = sin(*cbegin); + *cbegin = sqrt(*cbegin); + auto real_part = abs(*(cbegin)); + (void)real_part; +#endif +} +#endif +#endif \ No newline at end of file diff --git a/xsimd-algorithmConfig.cmake.in b/xsimd-algorithmConfig.cmake.in new file mode 100644 index 0000000..309b824 --- /dev/null +++ b/xsimd-algorithmConfig.cmake.in @@ -0,0 +1,8 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(xsimd @xsimd_REQUIRED_VERSION@) + +if(NOT TARGET xsimd-algorithm) + include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +endif() From 9bba29a4bbd6ddb003bdfe8dc5b11f122fd309d9 Mon Sep 17 00:00:00 2001 From: vvish Date: Thu, 20 Nov 2025 21:48:57 +0100 Subject: [PATCH 2/7] disable install sde --- .github/workflows/linux.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 8737af1..6cf214d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -60,9 +60,9 @@ jobs: # uses: mamba-org/setup-micromamba@v1 # with: # environment-file: environment.yml - - name: Setup SDE - if: startswith(matrix.sys.flags, 'avx512') - run: sh install_sde.sh + # - name: Setup SDE + # if: startswith(matrix.sys.flags, 'avx512') + # run: sh install_sde.sh - name: Configure build env: CC: ${{ env.CC }} From 600454b395eacc13f3f186e385256e29eda1591d Mon Sep 17 00:00:00 2001 From: vvish Date: Wed, 26 Nov 2025 17:45:04 +0100 Subject: [PATCH 3/7] Added env creation --- .github/workflows/linux.yml | 11 +++++++---- environment-dev.yml | 8 ++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 environment-dev.yml diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 6cf214d..2d32700 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -56,10 +56,13 @@ jobs: echo "CXX=$CXX" >> $GITHUB_ENV - name: Checkout xsimd-algorithm uses: actions/checkout@v3 - # - name: Install mamba - # uses: mamba-org/setup-micromamba@v1 - # with: - # environment-file: environment.yml + - name: Set conda environment + uses: mamba-org/setup-micromamba@main + with: + environment-name: myenv + environment-file: environment-dev.yml + init-shell: bash + cache-downloads: true # - name: Setup SDE # if: startswith(matrix.sys.flags, 'avx512') # run: sh install_sde.sh diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 0000000..cb1648e --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,8 @@ +name: xsimd-algorithm +channels: +- conda-forge +dependencies: +- cmake +- xsimd=13.2.0 +- doctest +- ninja \ No newline at end of file From 619f049f28657dcedcf70738aa5bb5ff924ec456 Mon Sep 17 00:00:00 2001 From: vvish Date: Wed, 26 Nov 2025 17:59:14 +0100 Subject: [PATCH 4/7] Missing include --- test/test_algorithms.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_algorithms.cpp b/test/test_algorithms.cpp index fb070fd..f924a59 100644 --- a/test/test_algorithms.cpp +++ b/test/test_algorithms.cpp @@ -16,6 +16,7 @@ #include "doctest/doctest.h" #include +#include #if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 #define ALGORITHMS_TYPES float, std::complex From 6403fea2454ecf17b5e38cd55e8908114badb6f5 Mon Sep 17 00:00:00 2001 From: vvish Date: Wed, 26 Nov 2025 18:07:53 +0100 Subject: [PATCH 5/7] Temporary remove avx512 tests from pipeline --- .github/workflows/linux.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 2d32700..33ff139 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -16,17 +16,17 @@ jobs: - { compiler: 'gcc', version: '12', flags: 'force_no_instr_set' } - { compiler: 'gcc', version: '13', flags: 'enable_xtl_complex' } - { compiler: 'gcc', version: '14', flags: 'avx' } - - { compiler: 'gcc', version: '13', flags: 'avx512' } + # - { compiler: 'gcc', version: '13', flags: 'avx512' } - { compiler: 'gcc', version: '12', flags: 'i386' } - - { compiler: 'gcc', version: '13', flags: 'avx512pf' } - - { compiler: 'gcc', version: '13', flags: 'avx512vbmi' } - - { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' } - - { compiler: 'gcc', version: '13', flags: 'avx512vnni' } + # - { compiler: 'gcc', version: '13', flags: 'avx512pf' } + # - { compiler: 'gcc', version: '13', flags: 'avx512vbmi' } + # - { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' } + # - { compiler: 'gcc', version: '13', flags: 'avx512vnni' } - { compiler: 'clang', version: '16', flags: 'force_no_instr_set' } - { compiler: 'clang', version: '16', flags: 'enable_xtl_complex' } - { compiler: 'clang', version: '17', flags: 'avx' } - { compiler: 'clang', version: '17', flags: 'sse3' } - - { compiler: 'clang', version: '18', flags: 'avx512' } + # - { compiler: 'clang', version: '18', flags: 'avx512' } steps: - name: Setup compiler if: ${{ matrix.sys.compiler == 'gcc' }} From be5f035c8a73f80b2c9ff71d3f042defe30a23da Mon Sep 17 00:00:00 2001 From: vvish Date: Sat, 29 Nov 2025 00:52:42 +0100 Subject: [PATCH 6/7] avx512 tests reenabled --- .github/workflows/linux.yml | 18 +++++++++--------- install_sde.sh | 10 ++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 install_sde.sh diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 33ff139..5df542f 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -16,17 +16,17 @@ jobs: - { compiler: 'gcc', version: '12', flags: 'force_no_instr_set' } - { compiler: 'gcc', version: '13', flags: 'enable_xtl_complex' } - { compiler: 'gcc', version: '14', flags: 'avx' } - # - { compiler: 'gcc', version: '13', flags: 'avx512' } + - { compiler: 'gcc', version: '13', flags: 'avx512' } - { compiler: 'gcc', version: '12', flags: 'i386' } - # - { compiler: 'gcc', version: '13', flags: 'avx512pf' } - # - { compiler: 'gcc', version: '13', flags: 'avx512vbmi' } - # - { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' } - # - { compiler: 'gcc', version: '13', flags: 'avx512vnni' } + - { compiler: 'gcc', version: '13', flags: 'avx512pf' } + - { compiler: 'gcc', version: '13', flags: 'avx512vbmi' } + - { compiler: 'gcc', version: '14', flags: 'avx512vbmi2' } + - { compiler: 'gcc', version: '13', flags: 'avx512vnni' } - { compiler: 'clang', version: '16', flags: 'force_no_instr_set' } - { compiler: 'clang', version: '16', flags: 'enable_xtl_complex' } - { compiler: 'clang', version: '17', flags: 'avx' } - { compiler: 'clang', version: '17', flags: 'sse3' } - # - { compiler: 'clang', version: '18', flags: 'avx512' } + - { compiler: 'clang', version: '18', flags: 'avx512' } steps: - name: Setup compiler if: ${{ matrix.sys.compiler == 'gcc' }} @@ -63,9 +63,9 @@ jobs: environment-file: environment-dev.yml init-shell: bash cache-downloads: true - # - name: Setup SDE - # if: startswith(matrix.sys.flags, 'avx512') - # run: sh install_sde.sh + - name: Setup SDE + if: startswith(matrix.sys.flags, 'avx512') + run: sh install_sde.sh - name: Configure build env: CC: ${{ env.CC }} diff --git a/install_sde.sh b/install_sde.sh new file mode 100644 index 0000000..44b6372 --- /dev/null +++ b/install_sde.sh @@ -0,0 +1,10 @@ +#git clone https://github.com/marehr/intel-sde-downloader +#cd intel-sde-downloader +#pip install -r requirements.txt +#python ./intel-sde-downloader.py sde-external-8.35.0-2019-03-11-lin.tar.bz2 +#wget http://software.intel.com/content/dam/develop/external/us/en/protected/sde-external-8.50.0-2020-03-26-lin.tar.bz2 + +wget --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36" https://github.com/xtensor-stack/xsimd-testing-resources/releases/download/2.0.0/sde-external-9.48.0-2024-11-25-lin.tar.xz + +tar xvf sde-external-9.48.0-2024-11-25-lin.tar.xz +sudo sh -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" From e1b47835482743307380b8e23490804f962f8fcc Mon Sep 17 00:00:00 2001 From: vvish Date: Sun, 30 Nov 2025 14:54:43 +0100 Subject: [PATCH 7/7] Separation of files by algorithm type --- include/xsimd_algorithm/algorithms.hpp | 18 + include/xsimd_algorithm/stl/reduce.hpp | 90 +++++ .../stl/transform.hpp} | 70 +--- test/CMakeLists.txt | 4 +- test/main.cpp | 2 +- test/test_algorithms.cpp | 349 ------------------ test/test_iterator.cpp | 73 ++++ test/test_reduce.cpp | 173 +++++++++ test/test_transform.cpp | 144 ++++++++ 9 files changed, 504 insertions(+), 419 deletions(-) create mode 100644 include/xsimd_algorithm/algorithms.hpp create mode 100644 include/xsimd_algorithm/stl/reduce.hpp rename include/{xsimd_algo/algorithms.hpp => xsimd_algorithm/stl/transform.hpp} (72%) delete mode 100644 test/test_algorithms.cpp create mode 100644 test/test_iterator.cpp create mode 100644 test/test_reduce.cpp create mode 100644 test/test_transform.cpp diff --git a/include/xsimd_algorithm/algorithms.hpp b/include/xsimd_algorithm/algorithms.hpp new file mode 100644 index 0000000..454f975 --- /dev/null +++ b/include/xsimd_algorithm/algorithms.hpp @@ -0,0 +1,18 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_ALGORITHMS_HPP +#define XSIMD_ALGORITHMS_HPP + +#include "xsimd_algorithm/stl/reduce.hpp" +#include "xsimd_algorithm/stl/transform.hpp" + +#endif diff --git a/include/xsimd_algorithm/stl/reduce.hpp b/include/xsimd_algorithm/stl/reduce.hpp new file mode 100644 index 0000000..1a42913 --- /dev/null +++ b/include/xsimd_algorithm/stl/reduce.hpp @@ -0,0 +1,90 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_ALGORITHMS_REDUCE_HPP +#define XSIMD_ALGORITHMS_REDUCE_HPP + +#include +#include +#include +#include + +#include "xsimd/xsimd.hpp" + +namespace xsimd +{ + // TODO: Remove this once we drop C++11 support + namespace detail + { + struct plus + { + template + auto operator()(X&& x, Y&& y) noexcept -> decltype(x + y) { return x + y; } + }; + } + + template + Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus {}) noexcept + { + using value_type = typename std::decay::type; + using batch_type = batch; + + std::size_t size = static_cast(std::distance(first, last)); + constexpr std::size_t simd_size = batch_type::size; + + if (size < simd_size) + { + while (first != last) + { + init = binfun(init, *first++); + } + return init; + } + + const auto* const ptr_begin = &(*first); + + std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size); + std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); + + // reduce initial unaligned part + for (std::size_t i = 0; i < align_begin; ++i) + { + init = binfun(init, first[i]); + } + + // reduce aligned part + auto ptr = ptr_begin + align_begin; + batch_type batch_init = batch_type::load_aligned(ptr); + ptr += simd_size; + for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) + { + batch_type batch = batch_type::load_aligned(ptr); + batch_init = binfun(batch_init, batch); + } + + // reduce across batch + alignas(batch_type) std::array arr; + xsimd::store_aligned(arr.data(), batch_init); + for (auto x : arr) + init = binfun(init, x); + + // reduce final unaligned part + for (std::size_t i = align_end; i < size; ++i) + { + init = binfun(init, first[i]); + } + + return init; + } + +} + +#endif diff --git a/include/xsimd_algo/algorithms.hpp b/include/xsimd_algorithm/stl/transform.hpp similarity index 72% rename from include/xsimd_algo/algorithms.hpp rename to include/xsimd_algorithm/stl/transform.hpp index 7c3935b..d757ff8 100644 --- a/include/xsimd_algo/algorithms.hpp +++ b/include/xsimd_algorithm/stl/transform.hpp @@ -9,10 +9,9 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#ifndef XSIMD_ALGORITHMS_HPP -#define XSIMD_ALGORITHMS_HPP +#ifndef XSIMD_ALGORITHMS_TRANSFORM_HPP +#define XSIMD_ALGORITHMS_TRANSFORM_HPP -#include #include #include #include @@ -131,71 +130,6 @@ namespace xsimd #undef XSIMD_LOOP_MACRO } - - // TODO: Remove this once we drop C++11 support - namespace detail - { - struct plus - { - template - auto operator()(X&& x, Y&& y) noexcept -> decltype(x + y) { return x + y; } - }; - } - - template - Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus {}) noexcept - { - using value_type = typename std::decay::type; - using batch_type = batch; - - std::size_t size = static_cast(std::distance(first, last)); - constexpr std::size_t simd_size = batch_type::size; - - if (size < simd_size) - { - while (first != last) - { - init = binfun(init, *first++); - } - return init; - } - - const auto* const ptr_begin = &(*first); - - std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size); - std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); - - // reduce initial unaligned part - for (std::size_t i = 0; i < align_begin; ++i) - { - init = binfun(init, first[i]); - } - - // reduce aligned part - auto ptr = ptr_begin + align_begin; - batch_type batch_init = batch_type::load_aligned(ptr); - ptr += simd_size; - for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) - { - batch_type batch = batch_type::load_aligned(ptr); - batch_init = binfun(batch_init, batch); - } - - // reduce across batch - alignas(batch_type) std::array arr; - xsimd::store_aligned(arr.data(), batch_init); - for (auto x : arr) - init = binfun(init, x); - - // reduce final unaligned part - for (std::size_t i = align_end; i < size; ++i) - { - init = binfun(init, first[i]); - } - - return init; - } - } #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c301a28..347d330 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,7 +47,9 @@ endif() set(XSIMD_ALGORITHM_TESTS main.cpp - test_algorithms.cpp + test_iterator.cpp + test_reduce.cpp + test_transform.cpp ) add_executable(test_xsimd_algorithm ${XSIMD_ALGORITHM_TESTS})# ${XSIMD_ALGORITHM_HEADERS}) diff --git a/test/main.cpp b/test/main.cpp index ef66818..418d57c 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -28,4 +28,4 @@ EMSCRIPTEN_BINDINGS(my_module) emscripten::function("run_tests", &run_tests); } -#endif \ No newline at end of file +#endif diff --git a/test/test_algorithms.cpp b/test/test_algorithms.cpp deleted file mode 100644 index f924a59..0000000 --- a/test/test_algorithms.cpp +++ /dev/null @@ -1,349 +0,0 @@ -/*************************************************************************** - * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * - * Martin Renou * - * Copyright (c) QuantStack * - * Copyright (c) Serge Guelton * - * * - * Distributed under the terms of the BSD 3-Clause License. * - * * - * The full license is in the file LICENSE, distributed with this software. * - ****************************************************************************/ - -#include "xsimd_algo/algorithms.hpp" - -#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE - -#include "doctest/doctest.h" - -#include -#include - -#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 -#define ALGORITHMS_TYPES float, std::complex -#else -#define ALGORITHMS_TYPES float, double, std::complex, std::complex -#endif - -template -struct transform_test -{ - using vector = std::vector; - using aligned_vector = std::vector>; - struct binary_functor - { - template - T operator()(const T& a, const T& b) const - { - return a + b; - } - }; - - struct unary_functor - { - template - T operator()(const T& a) const - { - return -a; - } - }; - - void test_binary_transform() const - { - vector expected(93); - vector a(93, 123), b(93, 123), c(93); - aligned_vector aa(93, 123), ba(93, 123), ca(93); - - std::transform(a.begin(), a.end(), b.begin(), expected.begin(), - binary_functor {}); - - xsimd::transform(a.begin(), a.end(), b.begin(), c.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), ba.begin(), c.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), b.begin(), c.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(a.begin(), a.end(), ba.begin(), c.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), ba.begin(), ca.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); - CHECK(expected.size() == ca.size()); - std::fill(ca.begin(), ca.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), b.begin(), ca.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); - CHECK(expected.size() == ca.size()); - std::fill(ca.begin(), ca.end(), -1); // erase - - xsimd::transform(a.begin(), a.end(), ba.begin(), ca.begin(), - binary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); - CHECK(expected.size() == ca.size()); - std::fill(ca.begin(), ca.end(), -1); // erase - } - - void test_unary_transform() const - { - vector expected(93); - vector a(93, 123), c(93); - aligned_vector aa(93, 123), ca(93); - - std::transform(a.begin(), a.end(), expected.begin(), - unary_functor {}); - - xsimd::transform(a.begin(), a.end(), c.begin(), - unary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), c.begin(), - unary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), c.begin())); - CHECK(expected.size() == c.size()); - std::fill(c.begin(), c.end(), -1); // erase - - xsimd::transform(a.begin(), a.end(), ca.begin(), - unary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); - CHECK(expected.size() == ca.size()); - std::fill(ca.begin(), ca.end(), -1); // erase - - xsimd::transform(aa.begin(), aa.end(), ca.begin(), - unary_functor {}); - CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); - CHECK(expected.size() == ca.size()); - std::fill(ca.begin(), ca.end(), -1); // erase - } -}; - -TEST_CASE_TEMPLATE("transform test", T, ALGORITHMS_TYPES) -{ - transform_test Test; - - SUBCASE("unary") { Test.test_unary_transform(); } - SUBCASE("binary") { Test.test_binary_transform(); } -} - -template -using test_allocator_type = xsimd::aligned_allocator; - -#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 -using test_value_type = float; -#else -using test_value_type = double; -#endif - -struct multiply -{ - template - T operator()(const T& a, const T& b) const - { - return a * b; - } -}; - -TEST_CASE("xsimd_reduce - unaligned_begin_unaligned_end") -{ - using aligned_vec_t = std::vector>; - constexpr std::size_t num_elements = 4 * xsimd::batch::size; - constexpr std::size_t small_num = xsimd::batch::size - 1; - - aligned_vec_t vec(num_elements, 123.); - aligned_vec_t small_vec(small_num, 42.); - test_value_type init = 1337.; - - auto const begin = std::next(vec.begin()); - auto const end = std::prev(vec.end()); - - CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); - - if (small_vec.size() > 1) - { - auto const sbegin = std::next(small_vec.begin()); - auto const send = std::prev(small_vec.end()); - - CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); - } -} - -TEST_CASE("xsimd_reduce - unaligned_begin_aligned_end") -{ - using aligned_vec_t = std::vector>; - constexpr std::size_t num_elements = 4 * xsimd::batch::size; - constexpr std::size_t small_num = xsimd::batch::size - 1; - - aligned_vec_t vec(num_elements, 123.); - aligned_vec_t small_vec(small_num, 42.); - test_value_type init = 1337.; - - auto const begin = std::next(vec.begin()); - auto const end = vec.end(); - - CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); - - if (small_vec.size() > 1) - { - auto const sbegin = std::next(small_vec.begin()); - auto const send = small_vec.end(); - - CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); - } -} - -TEST_CASE("xsimd_reduce - aligned_begin_unaligned_end") -{ - using aligned_vec_t = std::vector>; - constexpr std::size_t num_elements = 4 * xsimd::batch::size; - constexpr std::size_t small_num = xsimd::batch::size - 1; - - aligned_vec_t vec(num_elements, 123.); - aligned_vec_t small_vec(small_num, 42.); - test_value_type init = 1337.; - - auto const begin = vec.begin(); - auto const end = std::prev(vec.end()); - - CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); - - if (small_vec.size() > 1) - { - auto const sbegin = small_vec.begin(); - auto const send = std::prev(small_vec.end()); - - CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); - } -} - -TEST_CASE("xsimd_reduce - aligned_begin_aligned_end") -{ - using aligned_vec_t = std::vector>; - constexpr std::size_t num_elements = 4 * xsimd::batch::size; - constexpr std::size_t small_num = xsimd::batch::size - 1; - - aligned_vec_t vec(num_elements, 123.); - aligned_vec_t small_vec(small_num, 42.); - test_value_type init = 1337.; - - auto const begin = vec.begin(); - auto const end = vec.end(); - - CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); - - if (small_vec.size() > 1) - { - auto const sbegin = small_vec.begin(); - auto const send = small_vec.end(); - - CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); - } -} - -TEST_CASE("xsimd_reduce - using_custom_binary_function") -{ - using aligned_vec_t = std::vector>; - constexpr std::size_t num_elements = 4 * xsimd::batch::size; - constexpr std::size_t small_num = xsimd::batch::size - 1; - - aligned_vec_t vec(num_elements, 123.); - aligned_vec_t small_vec(small_num, 42.); - test_value_type init = 1337.; - - auto const begin = vec.begin(); - auto const end = vec.end(); - - if (std::is_same::value) - { - CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); - } - else - { - CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); - } - - if (small_vec.size() > 1) - { - auto const sbegin = small_vec.begin(); - auto const send = small_vec.end(); - - if (std::is_same::value) - { - CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); - } - else - { - CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); - } - } -} - -#if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE || XSIMD_ARM_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE -TEST_CASE("algorithms - iterator") -{ - std::vector> a(10 * 16, 0.2), b(1000, 2.), c(1000, 3.); - - std::iota(a.begin(), a.end(), 0.f); - std::vector a_cpy(a.begin(), a.end()); - - using batch_type = xsimd::batch; - auto begin = xsimd::aligned_iterator(&a[0]); - auto end = xsimd::aligned_iterator(&a[0] + a.size()); - - for (; begin != end; ++begin) - { - *begin = *begin / 2.f; - } - - for (auto& el : a_cpy) - { - el /= 2.f; - } - - CHECK(a.size() == a_cpy.size()); - CHECK(std::equal(a.begin(), a.end(), a_cpy.begin())); - - begin = xsimd::aligned_iterator(&a[0]); - *begin = sin(*begin); - - for (std::size_t i = 0; i < batch_type::size; ++i) - { - CHECK(a[i] == doctest::Approx(sinf(a_cpy[i])).epsilon(1e-6)); - } - -#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 - std::vector, test_allocator_type>> ca(10 * 16, std::complex(0.2)); - using cbatch_type = xsimd::batch>; - auto cbegin = xsimd::aligned_iterator(&ca[0]); - auto cend = xsimd::aligned_iterator(&ca[0] + a.size()); - - for (; cbegin != cend; ++cbegin) - { - *cbegin = (*cbegin + std::complex(0, .3)) / 2.; - } - - cbegin = xsimd::aligned_iterator(&ca[0]); - *cbegin = sin(*cbegin); - *cbegin = sqrt(*cbegin); - auto real_part = abs(*(cbegin)); - (void)real_part; -#endif -} -#endif -#endif \ No newline at end of file diff --git a/test/test_iterator.cpp b/test/test_iterator.cpp new file mode 100644 index 0000000..438c536 --- /dev/null +++ b/test/test_iterator.cpp @@ -0,0 +1,73 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include "xsimd_algorithm/algorithms.hpp" + +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE + +#include "doctest/doctest.h" + +template +using test_allocator_type = xsimd::aligned_allocator; + +#if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE || XSIMD_ARM_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE +TEST_CASE("algorithms - iterator") +{ + std::vector> a(10 * 16, 0.2), b(1000, 2.), c(1000, 3.); + + std::iota(a.begin(), a.end(), 0.f); + std::vector a_cpy(a.begin(), a.end()); + + using batch_type = xsimd::batch; + auto begin = xsimd::aligned_iterator(&a[0]); + auto end = xsimd::aligned_iterator(&a[0] + a.size()); + + for (; begin != end; ++begin) + { + *begin = *begin / 2.f; + } + + for (auto& el : a_cpy) + { + el /= 2.f; + } + + CHECK(a.size() == a_cpy.size()); + CHECK(std::equal(a.begin(), a.end(), a_cpy.begin())); + + begin = xsimd::aligned_iterator(&a[0]); + *begin = sin(*begin); + + for (std::size_t i = 0; i < batch_type::size; ++i) + { + CHECK(a[i] == doctest::Approx(sinf(a_cpy[i])).epsilon(1e-6)); + } + +#if !XSIMD_WITH_NEON || XSIMD_WITH_NEON64 + std::vector, test_allocator_type>> ca(10 * 16, std::complex(0.2)); + using cbatch_type = xsimd::batch>; + auto cbegin = xsimd::aligned_iterator(&ca[0]); + auto cend = xsimd::aligned_iterator(&ca[0] + a.size()); + + for (; cbegin != cend; ++cbegin) + { + *cbegin = (*cbegin + std::complex(0, .3)) / 2.; + } + + cbegin = xsimd::aligned_iterator(&ca[0]); + *cbegin = sin(*cbegin); + *cbegin = sqrt(*cbegin); + auto real_part = abs(*(cbegin)); + (void)real_part; +#endif +} +#endif +#endif diff --git a/test/test_reduce.cpp b/test/test_reduce.cpp new file mode 100644 index 0000000..84a5533 --- /dev/null +++ b/test/test_reduce.cpp @@ -0,0 +1,173 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include "xsimd_algorithm/stl/reduce.hpp" + +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE + +#include "doctest/doctest.h" + +#include +#include + +template +using test_allocator_type = xsimd::aligned_allocator; + +#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 +using test_value_type = float; +#else +using test_value_type = double; +#endif + +struct multiply +{ + template + T operator()(const T& a, const T& b) const + { + return a * b; + } +}; + +TEST_CASE("xsimd_reduce - unaligned_begin_unaligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = std::next(vec.begin()); + auto const end = std::prev(vec.end()); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = std::next(small_vec.begin()); + auto const send = std::prev(small_vec.end()); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - unaligned_begin_aligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = std::next(vec.begin()); + auto const end = vec.end(); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = std::next(small_vec.begin()); + auto const send = small_vec.end(); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - aligned_begin_unaligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = std::prev(vec.end()); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = std::prev(small_vec.end()); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - aligned_begin_aligned_end") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = vec.end(); + + CHECK_EQ(std::accumulate(begin, end, init), xsimd::reduce(begin, end, init)); + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = small_vec.end(); + + CHECK_EQ(std::accumulate(sbegin, send, init), xsimd::reduce(sbegin, send, init)); + } +} + +TEST_CASE("xsimd_reduce - using_custom_binary_function") +{ + using aligned_vec_t = std::vector>; + constexpr std::size_t num_elements = 4 * xsimd::batch::size; + constexpr std::size_t small_num = xsimd::batch::size - 1; + + aligned_vec_t vec(num_elements, 123.); + aligned_vec_t small_vec(small_num, 42.); + test_value_type init = 1337.; + + auto const begin = vec.begin(); + auto const end = vec.end(); + + if (std::is_same::value) + { + CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); + } + else + { + CHECK(std::accumulate(begin, end, init, multiply {}) == doctest::Approx(xsimd::reduce(begin, end, init, multiply {}))); + } + + if (small_vec.size() > 1) + { + auto const sbegin = small_vec.begin(); + auto const send = small_vec.end(); + + if (std::is_same::value) + { + CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); + } + else + { + CHECK(std::accumulate(sbegin, send, init, multiply {}) == doctest::Approx(xsimd::reduce(sbegin, send, init, multiply {}))); + } + } +} + +#endif diff --git a/test/test_transform.cpp b/test/test_transform.cpp new file mode 100644 index 0000000..3622112 --- /dev/null +++ b/test/test_transform.cpp @@ -0,0 +1,144 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include "xsimd_algorithm/stl/transform.hpp" + +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE + +#include "doctest/doctest.h" + +#include + +#if XSIMD_WITH_NEON && !XSIMD_WITH_NEON64 +#define ALGORITHMS_TYPES float, std::complex +#else +#define ALGORITHMS_TYPES float, double, std::complex, std::complex +#endif + +template +struct transform_test +{ + using vector = std::vector; + using aligned_vector = std::vector>; + struct binary_functor + { + template + T operator()(const T& a, const T& b) const + { + return a + b; + } + }; + + struct unary_functor + { + template + T operator()(const T& a) const + { + return -a; + } + }; + + void test_binary_transform() const + { + vector expected(93); + vector a(93, 123), b(93, 123), c(93); + aligned_vector aa(93, 123), ba(93, 123), ca(93); + + std::transform(a.begin(), a.end(), b.begin(), expected.begin(), + binary_functor {}); + + xsimd::transform(a.begin(), a.end(), b.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ba.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), b.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ba.begin(), c.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ba.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), b.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ba.begin(), ca.begin(), + binary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + } + + void test_unary_transform() const + { + vector expected(93); + vector a(93, 123), c(93); + aligned_vector aa(93, 123), ca(93); + + std::transform(a.begin(), a.end(), expected.begin(), + unary_functor {}); + + xsimd::transform(a.begin(), a.end(), c.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), c.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), c.begin())); + CHECK(expected.size() == c.size()); + std::fill(c.begin(), c.end(), -1); // erase + + xsimd::transform(a.begin(), a.end(), ca.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + + xsimd::transform(aa.begin(), aa.end(), ca.begin(), + unary_functor {}); + CHECK(std::equal(expected.begin(), expected.end(), ca.begin())); + CHECK(expected.size() == ca.size()); + std::fill(ca.begin(), ca.end(), -1); // erase + } +}; + +TEST_CASE_TEMPLATE("transform test", T, ALGORITHMS_TYPES) +{ + transform_test Test; + + SUBCASE("unary") { Test.test_unary_transform(); } + SUBCASE("binary") { Test.test_binary_transform(); } +} + +#endif