Merge pull request #23 from JohanMabille/rounding

rounding functions
xtensor-stack · May 29, 2017 · d8629e9 · d8629e9
2 parents bfd5b04 + 2de9097
commit d8629e9
Show file tree

Hide file tree

Showing 10 changed files with 569 additions and 4 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -37,6 +37,9 @@ set(XSIMD_HEADERS
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_config.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_include.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_instruction_set.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_fp_sign.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_numerical_constant.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_rounding.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/memory/xsimd_aligned_allocator.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/memory/xsimd_aligned_stack_buffer.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_avx_conversion.hpp

diff --git a/include/xsimd/math/xsimd_fp_sign.hpp b/include/xsimd/math/xsimd_fp_sign.hpp
@@ -0,0 +1,43 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille and Sylvain Corlay                     *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XSIMD_FP_SIGN_HPP
+#define XSIMD_FP_SIGN_HPP
+
+#include "xsimd_numerical_constant.hpp"
+
+namespace xsimd
+{
+
+    template <class T, std::size_t N>
+    batch<T, N> bitofsign(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> copysign(const batch<T, N>& x1, const batch<T, N>& x2);
+
+    /**************************
+     * fp_sign implementation *
+     **************************/
+
+    template <class T, std::size_t N>
+    inline batch<T, N> bitofsign(const batch<T, N>& x)
+    {
+        using btype = batch<T, N>;
+        return x & minuszero<btype>();
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> copysign(const batch<T, N>& x1, const batch<T, N>& x2)
+    {
+        return abs(x1) | bitofsign(x2);
+    }
+
+}
+
+#endif
+
diff --git a/include/xsimd/math/xsimd_numerical_constant.hpp b/include/xsimd/math/xsimd_numerical_constant.hpp
@@ -0,0 +1,94 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille and Sylvain Corlay                     *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XSIMD_NUMERICAL_CONSTANT_HPP
+#define XSIMD_NUMERICAL_CONSTANT_HPP
+
+#include "../types/xsimd_types_include.hpp"
+
+namespace xsimd
+{
+    template <class T>
+    constexpr T maxflint() noexcept;
+
+    template <class T>
+    constexpr T minuszero() noexcept;
+
+    template <class T>
+    constexpr T twotonmb() noexcept;
+
+    /***************************
+     * maxflint implementation *
+     ***************************/
+
+    template <class T>
+    constexpr T maxflint() noexcept
+    {
+        return T(maxflint<typename T::value_type>());
+    }
+
+    template <>
+    constexpr float maxflint<float>() noexcept
+    {
+        return 16777216.0f;
+    }
+
+    template <>
+    constexpr double maxflint<double>() noexcept
+    {
+        return 9007199254740992.0;
+    }
+
+    /****************************
+     * minuszero implementation *
+     ****************************/
+
+    template <class T>
+    constexpr T minuszero() noexcept
+    {
+        return T(minuszero<typename T::value_type>());
+    }
+
+    template <>
+    constexpr float minuszero<float>() noexcept
+    {
+        return -0.0f;
+    }
+
+    template <>
+    constexpr double minuszero<double>() noexcept
+    {
+        return -0.0;
+    }
+
+    /***************************
+     * twotonmb implementation *
+     ***************************/
+
+    template <class T>
+    constexpr T twotonmb() noexcept
+    {
+        return T(twotonmb<typename T::value_type>());
+    }
+
+    template <>
+    constexpr float twotonmb<float>() noexcept
+    {
+        return 8388608.0f;
+    }
+
+    template <>
+    constexpr double twotonmb<double>() noexcept
+    {
+        return 4503599627370496.0;
+    }
+
+}
+
+#endif
+
diff --git a/include/xsimd/math/xsimd_rounding.hpp b/include/xsimd/math/xsimd_rounding.hpp
@@ -0,0 +1,216 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille and Sylvain Corlay                     *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XSIMD_ROUNDING_HPP
+#define XSIMD_ROUNDING_HPP
+
+#include <cmath>
+#include "xsimd_fp_sign.hpp"
+#include "xsimd_numerical_constant.hpp"
+
+namespace xsimd
+{
+
+    template <class T, std::size_t N>
+    batch<T, N> ceil(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> floor(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> trunc(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> round(const batch<T, N>& x);
+
+    // Contrary to their std counterpart, these functions
+    // are assume that the rounding mode is FE_TONEAREST
+
+    template <class T, std::size_t N>
+    batch<T, N> nearbyint(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> rint(const batch<T, N>& x);
+
+    /**********************
+     * SSE implementation *
+     **********************/
+
+#if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE4_1_VERSION
+
+    template <>
+    inline batch<float, 4> ceil(const batch<float, 4>& x)
+    {
+        return _mm_ceil_ps(x);
+    }
+
+    template <>
+    inline batch<double, 2> ceil(const batch<double, 2>& x)
+    {
+        return _mm_ceil_pd(x);
+    }
+
+    template <>
+    inline batch<float, 4> floor(const batch<float, 4>& x)
+    {
+        return _mm_floor_ps(x);
+    }
+
+    template <>
+    inline batch<double, 2> floor(const batch<double, 2>& x)
+    {
+        return _mm_floor_pd(x);
+    }
+
+    template <>
+    inline batch<float, 4> trunc(const batch<float, 4>& x)
+    {
+        return _mm_round_ps(x, _MM_FROUND_TO_ZERO);
+    }
+
+    template <>
+    inline batch<double, 2> trunc(const batch<double, 2>& x)
+    {
+        return _mm_round_pd(x, _MM_FROUND_TO_ZERO);
+    }
+
+    template <>
+    inline batch<float, 4> nearbyint(const batch<float, 4>& x)
+    {
+        return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
+    }
+
+    template<>
+    inline batch<double, 2> nearbyint(const batch<double, 2>& x)
+    {
+        return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT);
+    }
+
+#elif XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE2_VERSION
+
+    template <class T, std::size_t N>
+    inline batch<T, N> ceil(const batch<T, N>& x)
+    {
+        using btype = batch<T, N>;
+        btype tx = trunc(x);
+        return select(tx < x, tx + btype(1), tx);
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> floor(const batch<T, N>& x)
+    {
+        using btype = batch<T, N>;
+        btype tx = trunc(x);
+        return select(tx > x, tx - btype(1), tx);
+    }
+
+    template <>
+    inline batch<float, 4> trunc(const batch<float, 4>& x)
+    {
+        using btype = batch<float, 4>;
+        return select(abs(x) < maxflint<btype>(), to_float(to_int(x)), x);
+    }
+
+    template <>
+    inline batch<double, 2> trunc(const batch<double, 2>& x)
+    {
+        return batch<double, 2>(std::trunc(x[0]), std::trunc(x[1]));
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> nearbyint(const batch<T, N>& x)
+    {
+        using btype = batch<T, N>;
+        btype s = bitofsign(x);
+        btype v = x | s;
+        btype t2n = twotonmb<btype>();
+        btype d0 = v + t2n;
+        return s | select(v < t2n, d0 - t2n, v);
+    }
+
+#endif
+
+    /**********************
+     * AVX implementation *
+     **********************/
+
+#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX_VERSION
+
+    template <>
+    inline batch<float, 8> ceil(const batch<float, 8>& x)
+    {
+        return _mm256_round_ps(x, _MM_FROUND_CEIL);
+    }
+
+    template <>
+    inline batch<double, 4> ceil(const batch<double, 4>& x)
+    {
+        return _mm256_round_pd(x, _MM_FROUND_CEIL);
+    }
+
+    template <>
+    inline batch<float, 8> floor(const batch<float, 8>& x)
+    {
+        return _mm256_round_ps(x, _MM_FROUND_FLOOR);
+    }
+
+    template<>
+    inline batch<double, 4> floor(const batch<double, 4>& x)
+    {
+        return _mm256_round_pd(x, _MM_FROUND_FLOOR);
+    }
+
+    template <>
+    inline batch<float, 8> trunc<float, 8>(const batch<float, 8>& x)
+    {
+        return _mm256_round_ps(x, _MM_FROUND_TO_ZERO);
+    }
+
+    template <>
+    inline batch<double, 4> trunc<double, 4>(const batch<double, 4>& x)
+    {
+        return _mm256_round_pd(x, _MM_FROUND_TO_ZERO);
+    }
+
+    template <>
+    inline batch<float, 8> nearbyint(const batch<float, 8>& x)
+    {
+        return _mm256_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
+    }
+
+    template<>
+    inline batch<double, 4> nearbyint(const batch<double, 4>& x)
+    {
+            return _mm256_round_pd(x, _MM_FROUND_TO_NEAREST_INT);
+    }
+
+#endif
+
+    /**************************
+     * Generic implementation *
+     **************************/
+
+    template <class T, std::size_t N>
+    inline batch<T, N> round(const batch<T, N>& x)
+    {
+        using btype = batch<T, N>;
+        btype v = abs(x);
+        btype c = ceil(v);
+        btype cp = select(c - btype(0.5) > v, c - btype(1), c);
+        return select(v > maxflint<btype>(), x, copysign(cp, x));
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> rint(const batch<T, N>& x)
+    {
+        return nearbyint(x);
+    }
+}
+
+#endif
+
diff --git a/include/xsimd/types/xsimd_avx_double.hpp b/include/xsimd/types/xsimd_avx_double.hpp
@@ -151,7 +151,7 @@ namespace xsimd
 
     inline batch_bool<double, 4>::operator __m256d() const
     {
-        return *this;
+        return m_value;
     }
 
     inline batch_bool<double, 4> operator&(const batch_bool<double, 4>& lhs, const batch_bool<double, 4>& rhs)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -65,8 +65,10 @@ include_directories(${GTEST_INCLUDE_DIRS})
 
 set(XSIMD_TESTS
     main.cpp
-    test_xsimd.cpp
     xsimd_basic_test.hpp
+    xsimd_basic_test.cpp
+    xsimd_rounding_test.hpp
+    xsimd_rounding_test.cpp
     xsimd_tester.hpp
     xsimd_test_utils.hpp
 )

diff --git a/test/test_xsimd.cpp → test/xsimd_basic_test.cpp b/test/test_xsimd.cpp → test/xsimd_basic_test.cpp
diff --git a/test/xsimd_basic_test.hpp b/test/xsimd_basic_test.hpp
@@ -6,8 +6,8 @@
 * The full license is in the file LICENSE, distributed with this software. *
 ****************************************************************************/
 
-#ifndef XSIMD_COMMON_TEST_HPP
-#define XSIMD_COMMON_TEST_HPP
+#ifndef XSIMD_BASIC_TEST_HPP
+#define XSIMD_BASIC_TEST_HPP
 
 #include "xsimd_tester.hpp"
 #include "xsimd_test_utils.hpp"