diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 51228749f..412d03644 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -134,7 +134,16 @@ set(XTENSOR_BENCHMARK set(XTENSOR_BENCHMARK_TARGET benchmark_xtensor) add_executable(${XTENSOR_BENCHMARK_TARGET} EXCLUDE_FROM_ALL ${XTENSOR_BENCHMARK} ${XTENSOR_HEADERS}) -target_link_libraries(${XTENSOR_BENCHMARK_TARGET} xtensor ${GBENCHMARK_LIBRARIES}) +target_link_libraries(${XTENSOR_BENCHMARK_TARGET} PUBLIC xtensor ${GBENCHMARK_LIBRARIES}) + +if(XTENSOR_USE_TBB) + target_compile_definitions(${XTENSOR_BENCHMARK_TARGET} PUBLIC XTENSOR_USE_TBB) + target_include_directories(${XTENSOR_BENCHMARK_TARGET} PUBLIC ${TBB_INCLUDE_DIRS}) + target_link_libraries(${XTENSOR_BENCHMARK_TARGET} PUBLIC ${TBB_LIBRARIES}) +endif() +if(XTENSOR_USE_OPENMP) + target_compile_definitions(${XTENSOR_BENCHMARK_TARGET} PUBLIC XTENSOR_USE_OPENMP) +endif() add_custom_target(xbenchmark COMMAND benchmark_xtensor diff --git a/benchmark/benchmark_views.cpp b/benchmark/benchmark_views.cpp index e0f9067d3..db233ec05 100644 --- a/benchmark/benchmark_views.cpp +++ b/benchmark/benchmark_views.cpp @@ -195,6 +195,76 @@ namespace xt BENCHMARK_TEMPLATE(view_assign_strided_view_noalias, float); } + namespace finite_diff + { + inline auto stencil_threedirections(benchmark::State& state, size_t size) + { + for (auto _ : state) + { + const std::array shape = {size, size, size}; + xt::xtensor a(shape), b(shape); + auto core = xt::range(1, size - 1); + xt::noalias(xt::view(b, core, core, core) + ) = 1.0 / 7.0 + * (xt::view(a, core, core, core) + xt::view(a, core, core, xt::range(2, size)) + + xt::view(a, core, core, xt::range(0, size - 2)) + + xt::view(a, core, xt::range(2, size), core) + + xt::view(a, core, xt::range(0, size - 2), core) + + xt::view(a, xt::range(2, size), core, core) + + xt::view(a, xt::range(0, size - 2), core, core)); + benchmark::DoNotOptimize(b); + } + } + + inline auto stencil_twodirections(benchmark::State& state, size_t size) + { + for (auto _ : state) + { + const std::array shape = {size, size, size}; + xt::xtensor a(shape), b(shape); + auto core = xt::range(1, size - 1); + xt::noalias(xt::view(b, core, core, core) + ) = 1.0 / 7.0 + * (xt::view(a, core, core, core) + xt::view(a, core, xt::range(2, size), core) + + xt::view(a, core, xt::range(0, size - 2), core) + + xt::view(a, xt::range(2, size), core, core) + + xt::view(a, xt::range(0, size - 2), core, core)); + benchmark::DoNotOptimize(b); + } + } + + inline auto stencil_onedirection(benchmark::State& state, size_t size) + { + for (auto _ : state) + { + const std::array shape = {size, size, size}; + xt::xtensor a(shape), b(shape); + auto core = xt::range(1, size - 1); + xt::noalias(xt::view(b, core, core, core) + ) = 1.0 / 2.0 + * (xt::view(a, xt::range(2, size), core, core) + - xt::view(a, xt::range(0, size - 2), core, core)); + benchmark::DoNotOptimize(b); + } + } + + BENCHMARK_CAPTURE(stencil_threedirections, stencil_threedirections_50, 50); + BENCHMARK_CAPTURE(stencil_threedirections, stencil_threedirections_100, 100); + BENCHMARK_CAPTURE(stencil_threedirections, stencil_threedirections_200, 200); + BENCHMARK_CAPTURE(stencil_threedirections, stencil_threedirections_300, 300); + BENCHMARK_CAPTURE(stencil_threedirections, stencil_threedirections_500, 500); + BENCHMARK_CAPTURE(stencil_twodirections, stencil_twodirections_50, 50); + BENCHMARK_CAPTURE(stencil_twodirections, stencil_twodirections_100, 100); + BENCHMARK_CAPTURE(stencil_twodirections, stencil_twodirections_200, 200); + BENCHMARK_CAPTURE(stencil_twodirections, stencil_twodirections_300, 300); + BENCHMARK_CAPTURE(stencil_twodirections, stencil_twodirections_500, 500); + BENCHMARK_CAPTURE(stencil_onedirection, stencil_onedirections_50, 50); + BENCHMARK_CAPTURE(stencil_onedirection, stencil_onedirections_100, 100); + BENCHMARK_CAPTURE(stencil_onedirection, stencil_onedirections_200, 200); + BENCHMARK_CAPTURE(stencil_onedirection, stencil_onedirections_300, 300); + BENCHMARK_CAPTURE(stencil_onedirection, stencil_onedirections_500, 500); + } + namespace stridedview { diff --git a/include/xtensor/xassign.hpp b/include/xtensor/xassign.hpp index 1c5535113..9461320a8 100644 --- a/include/xtensor/xassign.hpp +++ b/include/xtensor/xassign.hpp @@ -172,11 +172,30 @@ namespace xt * strided_loop_assigner * *************************/ + namespace strided_assign_detail + { + struct loop_sizes_t + { + bool can_do_strided_assign; + bool is_row_major; + std::size_t inner_loop_size; + std::size_t outer_loop_size; + std::size_t cut; + std::size_t dimension; + }; + } + template class strided_loop_assigner { public: + using loop_sizes_t = strided_assign_detail::loop_sizes_t; + // is_row_major, inner_loop_size, outer_loop_size, cut + template + static void run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes); + template + static loop_sizes_t get_loop_sizes(E1& e1, const E2& e2); template static void run(E1& e1, const E2& e2); }; @@ -254,7 +273,7 @@ namespace xt -> std::enable_if_t::value, bool> { return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout()) - || (e1.layout() != layout_type::dynamic && e2.has_linear_assign(e1.strides())); + || (e1.is_contiguous() && e2.has_linear_assign(e1.strides())); } template @@ -457,8 +476,7 @@ namespace xt } else { - stepper_assigner assigner(de1, de2); - assigner.run(); + stepper_assigner(de1, de2).run(); } } @@ -713,6 +731,7 @@ namespace xt #if defined(XTENSOR_USE_TBB) if (size >= XTENSOR_TBB_THRESHOLD) { + tbb::static_partitioner ap; tbb::parallel_for( align_begin, align_end, @@ -723,7 +742,8 @@ namespace xt i, e2.template load_simd(i) ); - } + }, + ap ); } else @@ -734,7 +754,7 @@ namespace xt } } #elif defined(XTENSOR_USE_OPENMP) - if (size >= XTENSOR_OPENMP_TRESHOLD) + if (size >= size_type(XTENSOR_OPENMP_TRESHOLD)) { #pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2) #ifndef _WIN32 @@ -789,21 +809,34 @@ namespace xt auto src = linear_begin(e2); auto dst = linear_begin(e1); size_type n = e1.size(); - #if defined(XTENSOR_USE_TBB) + tbb::static_partitioner sp; tbb::parallel_for( std::ptrdiff_t(0), static_cast(n), [&](std::ptrdiff_t i) { *(dst + i) = static_cast(*(src + i)); - } + }, + sp ); #elif defined(XTENSOR_USE_OPENMP) + if (n >= XTENSOR_OPENMP_TRESHOLD) + { #pragma omp parallel for default(none) shared(src, dst, n) - for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast(n); i++) + for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast(n); i++) + { + *(dst + i) = static_cast(*(src + i)); + } + } + else { - *(dst + i) = static_cast(*(src + i)); + for (; n > size_type(0); --n) + { + *dst = static_cast(*src); + ++src; + ++dst; + } } #else for (; n > size_type(0); --n) @@ -850,6 +883,27 @@ namespace xt } } } + + template + static void nth_idx(size_t n, T& outer_index, const T& outer_shape) + { + dynamic_shape stride_sizes; + xt::resize_container(stride_sizes, outer_shape.size()); + // compute strides + using size_type = typename T::size_type; + for (size_type i = outer_shape.size(); i > 0; i--) + { + stride_sizes[i - 1] = (i == outer_shape.size()) ? 1 : stride_sizes[i] * outer_shape[i]; + } + + // compute index + for (size_type i = 0; i < outer_shape.size(); i++) + { + auto d_idx = n / stride_sizes[i]; + outer_index[i] = d_idx; + n -= d_idx * stride_sizes[i]; + } + } }; template <> @@ -874,6 +928,30 @@ namespace xt } } } + + template + static void nth_idx(size_t n, T& outer_index, const T& outer_shape) + { + dynamic_shape stride_sizes; + xt::resize_container(stride_sizes, outer_shape.size()); + + using size_type = typename T::size_type; + + // compute required strides + for (size_type i = 0; i < outer_shape.size(); i++) + { + stride_sizes[i] = (i == 0) ? 1 : stride_sizes[i - 1] * outer_shape[i - 1]; + } + + // compute index + for (size_type i = outer_shape.size(); i > 0;) + { + i--; + auto d_idx = n / stride_sizes[i]; + outer_index[i] = d_idx; + n -= d_idx * stride_sizes[i]; + } + } }; template @@ -890,6 +968,7 @@ namespace xt template std::enable_if_t operator()(const T& el) { + // All dimenions less than var have differing strides auto var = check_strides_overlap::get(m_strides, el.strides()); if (var > m_cut) { @@ -902,6 +981,7 @@ namespace xt std::enable_if_t operator()(const T& el) { auto var = check_strides_overlap::get(m_strides, el.strides()); + // All dimensions >= var have differing strides if (var < m_cut) { m_cut = var; @@ -928,25 +1008,70 @@ namespace xt const strides_type& m_strides; }; - template - auto get_loop_sizes(const E1& e1, const E2& e2, bool is_row_major) + template ::value || !possible, bool> = true> + loop_sizes_t get_loop_sizes(const E1& e1, const E2&) { + return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()}; + } + + template ::value && possible, bool> = true> + loop_sizes_t get_loop_sizes(const E1& e1, const E2& e2) + { + using shape_value_type = typename E1::shape_type::value_type; + bool is_row_major = true; + + // Try to find a row-major scheme first, where the outer loop is on the first N = `cut` + // dimensions, and the inner loop is + is_row_major = true; + auto is_zero = [](auto i) + { + return i == 0; + }; + auto&& strides = e1.strides(); + auto it_bwd = std::find_if_not(strides.rbegin(), strides.rend(), is_zero); + bool de1_row_contiguous = it_bwd != strides.rend() && *it_bwd == 1; + auto it_fwd = std::find_if_not(strides.begin(), strides.end(), is_zero); + bool de1_col_contiguous = it_fwd != strides.end() && *it_fwd == 1; + if (de1_row_contiguous) + { + is_row_major = true; + } + else if (de1_col_contiguous) + { + is_row_major = false; + } + else + { + // No strided loop possible. + return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()}; + } + + // Cut is the number of dimensions in the outer loop std::size_t cut = 0; - // TODO! if E1 is !contiguous --> initialize cut to sensible value! - if (E1::static_layout == layout_type::row_major || is_row_major) + if (is_row_major) { auto csf = check_strides_functor(e1.strides()); cut = csf(e2); + // This makes that only one dimension will be treated in the inner loop. + if (cut < e1.strides().size() - 1) + { + // Only make the inner loop go over one dimension by default for now + cut = e1.strides().size() - 1; + } } - else if (E1::static_layout == layout_type::column_major || !is_row_major) + else if (!is_row_major) { auto csf = check_strides_functor(e1.strides() ); cut = csf(e2); + if (cut > 1) + { + // Only make the inner loop go over one dimension by default for now + cut = 1; + } } // can't reach here because this would have already triggered the fallback - using shape_value_type = typename E1::shape_type::value_type; std::size_t outer_loop_size = static_cast(std::accumulate( e1.shape().begin(), e1.shape().begin() + static_cast(cut), @@ -960,61 +1085,33 @@ namespace xt std::multiplies{} )); - if (E1::static_layout == layout_type::column_major || !is_row_major) + if (!is_row_major) { std::swap(outer_loop_size, inner_loop_size); } - return std::make_tuple(inner_loop_size, outer_loop_size, cut); + return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()}; } } template template - inline void strided_loop_assigner::run(E1& e1, const E2& e2) + inline strided_assign_detail::loop_sizes_t strided_loop_assigner::get_loop_sizes(E1& e1, const E2& e2) { - bool is_row_major = true; - using fallback_assigner = stepper_assigner; + return strided_assign_detail::get_loop_sizes(e1, e2); + } - if (E1::static_layout == layout_type::dynamic) - { - layout_type dynamic_layout = e1.layout(); - switch (dynamic_layout) - { - case layout_type::row_major: - is_row_major = true; - break; - case layout_type::column_major: - is_row_major = false; - break; - default: - return fallback_assigner(e1, e2).run(); - } - } - else if (E1::static_layout == layout_type::row_major) - { - is_row_major = true; - } - else if (E1::static_layout == layout_type::column_major) - { - is_row_major = false; - } - else - { - XTENSOR_THROW(std::runtime_error, "Illegal layout set (layout_type::any?)."); - } +#define strided_parallel_assign - std::size_t inner_loop_size, outer_loop_size, cut; - std::tie(inner_loop_size, outer_loop_size, cut) = strided_assign_detail::get_loop_sizes( - e1, - e2, - is_row_major - ); + template + template + inline void strided_loop_assigner::run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes) + { + bool is_row_major = loop_sizes.is_row_major; + std::size_t inner_loop_size = loop_sizes.inner_loop_size; + std::size_t outer_loop_size = loop_sizes.outer_loop_size; + std::size_t cut = loop_sizes.cut; - if ((is_row_major && cut == e1.dimension()) || (!is_row_major && cut == 0)) - { - return fallback_assigner(e1, e2).run(); - } // TODO can we get rid of this and use `shape_type`? dynamic_shape idx, max_shape; @@ -1054,50 +1151,216 @@ namespace xt { step_dim = cut; } - - for (std::size_t ox = 0; ox < outer_loop_size; ++ox) +#if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign) + if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size) { - for (std::size_t i = 0; i < simd_size; ++i) + std::size_t first_step = true; +#pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx) + for (std::size_t ox = 0; ox < outer_loop_size; ++ox) { - res_stepper.store_simd(fct_stepper.template step_simd()); - } - for (std::size_t i = 0; i < simd_rest; ++i) - { - *(res_stepper) = conditional_cast(*(fct_stepper)); - res_stepper.step_leading(); - fct_stepper.step_leading(); - } + if (first_step) + { + is_row_major + ? strided_assign_detail::idx_tools::nth_idx(ox, idx, max_shape) + : strided_assign_detail::idx_tools::nth_idx(ox, idx, max_shape); - is_row_major - ? strided_assign_detail::idx_tools::next_idx(idx, max_shape) - : strided_assign_detail::idx_tools::next_idx(idx, max_shape); + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + res_stepper.step(i + step_dim, idx[i]); + } + first_step = false; + } - fct_stepper.to_begin(); + for (std::size_t i = 0; i < simd_size; ++i) + { + res_stepper.template store_simd(fct_stepper.template step_simd()); + } + for (std::size_t i = 0; i < simd_rest; ++i) + { + *(res_stepper) = conditional_cast(*(fct_stepper)); + res_stepper.step_leading(); + fct_stepper.step_leading(); + } - // need to step E1 as well if not contigous assign (e.g. view) - if (!E1::contiguous_layout) - { - res_stepper.to_begin(); - for (std::size_t i = 0; i < idx.size(); ++i) + // next unaligned index + is_row_major + ? strided_assign_detail::idx_tools::next_idx(idx, max_shape) + : strided_assign_detail::idx_tools::next_idx(idx, max_shape); + + fct_stepper.to_begin(); + + // need to step E1 as well if not contigous assign (e.g. view) + if (!E1::contiguous_layout) { - fct_stepper.step(i + step_dim, idx[i]); - res_stepper.step(i + step_dim, idx[i]); + res_stepper.to_begin(); + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + res_stepper.step(i + step_dim, idx[i]); + } + } + else + { + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + } } } - else + } + else + { +#elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB) + if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size) + { + tbb::static_partitioner sp; + tbb::parallel_for( + tbb::blocked_range(0ul, outer_loop_size), + [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx]( + const tbb::blocked_range& r + ) + { + auto idx = idx_; + auto fct_stepper = e2.stepper_begin(e1.shape()); + auto res_stepper = e1.stepper_begin(e1.shape()); + std::size_t first_step = true; + // #pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, + // res_stepper, idx) + for (std::size_t ox = r.begin(); ox < r.end(); ++ox) + { + if (first_step) + { + is_row_major + ? strided_assign_detail::idx_tools::nth_idx(ox, idx, max_shape) + : strided_assign_detail::idx_tools::nth_idx( + ox, + idx, + max_shape + ); + + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + res_stepper.step(i + step_dim, idx[i]); + } + first_step = false; + } + + for (std::size_t i = 0; i < simd_size; ++i) + { + res_stepper.template store_simd(fct_stepper.template step_simd()); + } + for (std::size_t i = 0; i < simd_rest; ++i) + { + *(res_stepper) = conditional_cast(*(fct_stepper)); + res_stepper.step_leading(); + fct_stepper.step_leading(); + } + + // next unaligned index + is_row_major + ? strided_assign_detail::idx_tools::next_idx(idx, max_shape) + : strided_assign_detail::idx_tools::next_idx(idx, max_shape); + + fct_stepper.to_begin(); + + // need to step E1 as well if not contigous assign (e.g. view) + if (!E1::contiguous_layout) + { + res_stepper.to_begin(); + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + res_stepper.step(i + step_dim, idx[i]); + } + } + else + { + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + } + } + } + }, + sp + ); + } + else + { + +#endif + for (std::size_t ox = 0; ox < outer_loop_size; ++ox) { - for (std::size_t i = 0; i < idx.size(); ++i) + for (std::size_t i = 0; i < simd_size; ++i) + { + res_stepper.store_simd(fct_stepper.template step_simd()); + } + for (std::size_t i = 0; i < simd_rest; ++i) + { + *(res_stepper) = conditional_cast(*(fct_stepper)); + res_stepper.step_leading(); + fct_stepper.step_leading(); + } + + is_row_major + ? strided_assign_detail::idx_tools::next_idx(idx, max_shape) + : strided_assign_detail::idx_tools::next_idx(idx, max_shape); + + fct_stepper.to_begin(); + + // need to step E1 as well if not contigous assign (e.g. view) + if (!E1::contiguous_layout) + { + res_stepper.to_begin(); + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + res_stepper.step(i + step_dim, idx[i]); + } + } + else { - fct_stepper.step(i + step_dim, idx[i]); + for (std::size_t i = 0; i < idx.size(); ++i) + { + fct_stepper.step(i + step_dim, idx[i]); + } } } +#if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign) + } +#endif + } + + template <> + template + inline void strided_loop_assigner::run(E1& e1, const E2& e2) + { + strided_assign_detail::loop_sizes_t loop_sizes = strided_loop_assigner::get_loop_sizes(e1, e2); + if (loop_sizes.can_do_strided_assign) + { + run(e1, e2, loop_sizes); } + else + { + // trigger the fallback assigner + stepper_assigner(e1, e2).run(); + } + } + + template <> + template + inline void strided_loop_assigner::run(E1& /*e1*/, const E2& /*e2*/, const loop_sizes_t&) + { } template <> template - inline void strided_loop_assigner::run(E1& /*e1*/, const E2& /*e2*/) + inline void strided_loop_assigner::run(E1& e1, const E2& e2) { + // trigger the fallback assigner + stepper_assigner(e1, e2).run(); } } diff --git a/include/xtensor/xcontainer.hpp b/include/xtensor/xcontainer.hpp index f5a79d3d7..d4621672d 100644 --- a/include/xtensor/xcontainer.hpp +++ b/include/xtensor/xcontainer.hpp @@ -924,9 +924,32 @@ namespace xt inline bool xstrided_container::is_contiguous() const noexcept { using str_type = typename inner_strides_type::value_type; - return is_contiguous_container::value - && (m_strides.empty() || (m_layout == layout_type::row_major && m_strides.back() == str_type(1)) - || (m_layout == layout_type::column_major && m_strides.front() == str_type(1))); + auto is_zero = [](auto i) + { + return i == 0; + }; + if (!is_contiguous_container::value) + { + return false; + } + // We need to make sure the inner-most non-zero stride is one. + // Trailing zero strides are ignored because they indicate bradcasted dimensions. + if (m_layout == layout_type::row_major) + { + auto it = std::find_if_not(m_strides.rbegin(), m_strides.rend(), is_zero); + // If the array has strides of zero, it is a constant, and therefore contiguous. + return it == m_strides.rend() || *it == str_type(1); + } + else if (m_layout == layout_type::column_major) + { + auto it = std::find_if_not(m_strides.begin(), m_strides.end(), is_zero); + // If the array has strides of zero, it is a constant, and therefore contiguous. + return it == m_strides.end() || *it == str_type(1); + } + else + { + return m_strides.empty(); + } } namespace detail diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7f696e6a5..0ce881944 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -173,6 +173,7 @@ set(XTENSOR_TESTS main.cpp test_xaccumulator.cpp test_xadapt.cpp + test_strided_assign.cpp test_xassign.cpp test_xaxis_iterator.cpp test_xaxis_slice_iterator.cpp diff --git a/test/test_strided_assign.cpp b/test/test_strided_assign.cpp new file mode 100644 index 000000000..e10ba74c2 --- /dev/null +++ b/test/test_strided_assign.cpp @@ -0,0 +1,197 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht * + * Copyright (c) QuantStack * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include +#include + +#include "xtensor/xadapt.hpp" +#include "xtensor/xarray.hpp" +#include "xtensor/xassign.hpp" +#include "xtensor/xlayout.hpp" +#include "xtensor/xnoalias.hpp" +#include "xtensor/xtensor.hpp" +#include "xtensor/xview.hpp" + +#include "test_common.hpp" +#include "test_common_macros.hpp" + +namespace xt +{ + TEST(xassign_strided, mix_shape_types) + { + auto check_linear_assign = [](auto a, auto b) + { + assert_compatible_shape(a, b); + return xassign_traits::linear_assign(a, b, true); + }; + auto check_strided_assign = [](auto a, auto b) + { + assert_compatible_shape(a, b); +#ifndef _WIN32 + static_assert( + xassign_traits::strided_assign(), + "Failed to do strided assign" + ); +#endif + return strided_assign_detail::get_loop_sizes(a, b).can_do_strided_assign; + }; + { + size_t size = 50; + const std::array shape = {size, size, size}; + xt::xtensor a(shape), b(shape); + auto core = xt::range(1, size - 1); + auto lhs = xt::view(b, core, core, core); + auto rhs = 1.0 / 7.0 + * (xt::view(a, core, core, core) + xt::view(a, core, xt::range(2, size), core) + + xt::view(a, core, xt::range(0, size - 2), core) + + xt::view(a, xt::range(2, size), core, core) + + xt::view(a, xt::range(0, size - 2), core, core)); + xt::noalias(lhs) = rhs; + EXPECT_TRUE(check_strided_assign(lhs, rhs)); + } + { + auto data = std::vector{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + auto simple_xtensor_12 = xt::xtensor{ + {1, 2, 3, 4}, + {5, 6, 7, 8}, + {9, 10, 11, 12}}; + auto simple_xtensor_16 = xt::xtensor{ + {1, 2, 3, 4}, + {5, 6, 7, 8}, + {9, 10, 11, 12}, + {13, 14, 15, 16}}; + auto adapter_strided_noncont = xt::adapt( + data.data(), + 12, + xt::no_ownership(), + std::vector{4, 3}, + std::vector{4, 1} + ); + auto adapter_strided_cont = xt::adapt( + data.data() + 1, + 12, + xt::no_ownership(), + std::vector{3, 4}, + std::vector{4, 1} + ); + auto adapter_strided_noncont_43 = xt::adapt( + data.data(), + 12, + xt::no_ownership(), + std::vector{3, 4}, + std::vector{5, 1} + ); + EXPECT_FALSE(check_strided_assign(adapter_strided_noncont, xt::transpose(adapter_strided_cont))); + auto contiguous_view = xt::view(simple_xtensor_12, xt::range(0, 3), xt::all()); + EXPECT_TRUE(check_linear_assign(contiguous_view, adapter_strided_cont)); + EXPECT_FALSE(check_linear_assign(contiguous_view, xt::transpose(adapter_strided_noncont))); + EXPECT_FALSE(check_linear_assign(xt::transpose(adapter_strided_noncont), contiguous_view)); + EXPECT_FALSE(check_linear_assign(contiguous_view, adapter_strided_noncont_43)); + EXPECT_FALSE(check_linear_assign(adapter_strided_noncont_43, contiguous_view)); + EXPECT_TRUE(check_strided_assign(contiguous_view, adapter_strided_noncont_43)); + EXPECT_TRUE(check_strided_assign(adapter_strided_noncont_43, contiguous_view)); + auto contiguous_view2 = xt::view(simple_xtensor_12, xt::range(1, 3), xt::all()); + auto contiguous_view3 = xt::view(simple_xtensor_12, xt::range(0, 2), xt::all()); + EXPECT_TRUE(check_linear_assign(contiguous_view2, contiguous_view3)); + { + auto view_noncont = xt::view(simple_xtensor_16, xt::all(), xt::range(0, 3)); + EXPECT_FALSE(check_linear_assign(adapter_strided_noncont, view_noncont)); + EXPECT_TRUE(check_strided_assign(adapter_strided_noncont, view_noncont)); + } + { + auto view_cont = xt::view(simple_xtensor_16, xt::range(0, 1), xt::range(0, 4)); + auto view_cont2 = xt::view(simple_xtensor_16, xt::range(3, 4), xt::range(0, 4)); + EXPECT_TRUE(check_linear_assign(view_cont, view_cont2)); + auto view_noncont = xt::view(simple_xtensor_16, xt::range(0, 4), xt::range(0, 3)); + auto view_noncont2 = xt::view(simple_xtensor_16, xt::range(0, 4), xt::range(1, 4)); + EXPECT_FALSE(check_linear_assign(view_noncont, view_noncont2)); + EXPECT_TRUE(check_strided_assign(view_noncont, view_noncont2)); + } + + { + std::vector data2{-1, -1, -1, -1}; + auto linear_adapter = xt::adapt( + data2.data(), + 4, + xt::no_ownership(), + std::vector{4, 1} + ); + auto adapter_cont2 = xt::adapt( + data.data(), + 4, + xt::no_ownership(), + std::vector{1, 4} + ); + EXPECT_TRUE(linear_adapter.is_contiguous()); + EXPECT_TRUE(adapter_cont2.is_contiguous()); + bool success_one = check_linear_assign(linear_adapter, xt::transpose(adapter_cont2)); + bool success_two = check_linear_assign(adapter_cont2, xt::transpose(linear_adapter)); + EXPECT_TRUE(success_one && success_two); + auto adapter_noncont_singlecol = xt::adapt( + data.data(), + 4, + xt::no_ownership(), + std::vector{4, 1}, + std::vector{4, 1} + ); + EXPECT_FALSE(check_linear_assign(adapter_noncont_singlecol, linear_adapter)); + EXPECT_FALSE(check_linear_assign(linear_adapter, adapter_noncont_singlecol)); + EXPECT_FALSE(check_strided_assign(adapter_noncont_singlecol, linear_adapter)); + EXPECT_FALSE(check_strided_assign(linear_adapter, adapter_noncont_singlecol)); + auto adapter_noncont_twocol = xt::adapt( + data.data(), + 4, + xt::no_ownership(), + std::vector{2, 2}, + std::vector{4, 1} + ); + EXPECT_FALSE(check_linear_assign(adapter_noncont_twocol, linear_adapter.reshape({2, 2}))); + EXPECT_FALSE(check_linear_assign(linear_adapter.reshape({2, 2}), adapter_noncont_twocol)); + EXPECT_TRUE(check_strided_assign(adapter_noncont_twocol, linear_adapter.reshape({2, 2}))); + EXPECT_TRUE(check_strided_assign(linear_adapter.reshape({2, 2}), adapter_noncont_twocol)); + auto adapter_zero_strides = xt::adapt( + data.data(), + 4, + xt::no_ownership(), + std::vector{2, 2}, + std::vector{0, 0} + ); + EXPECT_FALSE(check_linear_assign(adapter_zero_strides, adapter_zero_strides)); + EXPECT_FALSE(check_strided_assign(adapter_zero_strides, adapter_zero_strides)); + EXPECT_FALSE(check_linear_assign(adapter_zero_strides, linear_adapter)); + } + + { + std::vector data2{-1, -1, -1, -1, -1, -1}; + auto linear_adapter2 = xt::adapt( + data2.data(), + 6, + xt::no_ownership(), + std::vector{3, 2} + ); + auto strided_adapter = xt::adapt( + data.data(), + 6, + xt::no_ownership(), + std::vector{3, 2}, + std::vector{4, 1} + ); + EXPECT_FALSE(check_linear_assign(linear_adapter2, strided_adapter)); + EXPECT_FALSE(check_linear_assign(strided_adapter, linear_adapter2)); + EXPECT_TRUE(check_strided_assign(linear_adapter2, strided_adapter)); + EXPECT_TRUE(check_strided_assign(strided_adapter, linear_adapter2)); + xt::noalias(strided_adapter) = linear_adapter2; + auto result_expected = std::vector{ + -1, -1, 3, 4, -1, -1, 7, 8, -1, -1, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; + EXPECT_EQ(data, result_expected); + } + } + } +} diff --git a/test/test_xassign.cpp b/test/test_xassign.cpp index 475d0e5cf..5367a32da 100644 --- a/test/test_xassign.cpp +++ b/test/test_xassign.cpp @@ -7,6 +7,7 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ +#include #include #include @@ -82,6 +83,16 @@ class my_vector return m_data.begin(); } + auto rbegin() const + { + return std::make_reverse_iterator(end()); + } + + auto rend() const + { + return std::make_reverse_iterator(begin()); + } + auto empty() const { return m_data.empty();