Merge pull request #26 from JohanMabille/exp

exponential functions
xtensor-stack · Jun 2, 2017 · da1fa0b · da1fa0b
2 parents 59b26e4 + 24930a6
commit da1fa0b
Show file tree

Hide file tree

Showing 10 changed files with 1,007 additions and 9 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -37,8 +37,11 @@ set(XSIMD_HEADERS
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_config.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_include.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_instruction_set.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_exp_reduction.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_exponential.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_fp_manipulation.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_fp_sign.hpp
+    ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_horner.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_numerical_constant.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_rounding.hpp
     ${XSIMD_INCLUDE_DIR}/xsimd/memory/xsimd_aligned_allocator.hpp

diff --git a/include/xsimd/math/xsimd_exp_reduction.hpp b/include/xsimd/math/xsimd_exp_reduction.hpp
@@ -0,0 +1,247 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille and Sylvain Corlay                     *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XSIMD_EXP_REDUCTION_HPP
+#define XSIMD_EXP_REDUCTION_HPP
+
+#include "xsimd_horner.hpp"
+#include "xsimd_numerical_constant.hpp"
+#include "xsimd_rounding.hpp"
+
+namespace xsimd
+{
+    struct exp_tag {};
+    struct exp2_tag {};
+    struct exp10_tag {};
+
+    namespace detail
+    {
+
+        /**********************
+         * exp_reduction_base *
+         **********************/
+
+        template <class B, class Tag>
+        struct exp_reduction_base;
+
+        template <class B>
+        struct exp_reduction_base<B, exp_tag>
+        {
+            static constexpr B maxlog() noexcept
+            {
+                return xsimd::maxlog<B>();
+            }
+
+            static constexpr B minlog() noexcept
+            {
+                return xsimd::minlog<B>();
+            }
+        };
+
+        template <class B>
+        struct exp_reduction_base<B, exp2_tag>
+        {
+            static constexpr B maxlog() noexcept
+            {
+                return xsimd::maxlog2<B>();
+            }
+
+            static constexpr B minlog() noexcept
+            {
+                return xsimd::minlog2<B>();
+            }
+        };
+
+        template <class B>
+        struct exp_reduction_base<B, exp10_tag>
+        {
+            static constexpr B maxlog() noexcept
+            {
+                return xsimd::maxlog10<B>();
+            }
+
+            static constexpr B minlog() noexcept
+            {
+                return xsimd::minlog10<B>();
+            }
+        };
+
+        /*****************
+         * exp_reduction *
+         *****************/
+
+        template <class B, class Tag, class T = typename B::value_type>
+        struct exp_reduction;
+
+        template <class B>
+        struct exp_reduction<B, exp_tag, float> : exp_reduction_base<B, exp_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                B y = horner<B,
+                    0x3f000000, //  5.0000000e-01
+                    0x3e2aa9a5, //  1.6666277e-01
+                    0x3d2aa957, //  4.1665401e-02
+                    0x3c098d8b, //  8.3955629e-03
+                    0x3ab778cf  //  1.3997796e-03
+                >(x);
+                return ++fma(y, x * x, x);
+            }
+
+            static inline B reduce(const B& a, B& x)
+            {
+                B k = nearbyint(invlog_2<B>() * a);
+                x = fnma(k, log_2hi<B>(), a);
+                x = fnma(k, log_2lo<B>(), x);
+                return k;
+            }
+        };
+
+        template <class B>
+        struct exp_reduction<B, exp2_tag, float> : exp_reduction_base<B, exp2_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                B y = horner<B,
+                    0x3e75fdf1,  //    2.4022652e-01
+                    0x3d6356eb,  //    5.5502813e-02
+                    0x3c1d9422,  //    9.6178371e-03
+                    0x3ab01218,  //    1.3433127e-03
+                    0x3922c8c4   //    1.5524315e-04
+                >(x);
+                return ++fma(y, x * x, x * log_2<B>());
+            }
+
+            static inline B reduce(const B& a, B& x)
+            {
+                B k = nearbyint(a);
+                x = (a - k);
+                return k;
+            }
+        };
+
+        template <class B>
+        struct exp_reduction<B, exp10_tag, float> : exp_reduction_base<B, exp10_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                return ++(horner<B,
+                    0x40135d8e, //    2.3025851e+00
+                    0x4029a926, //    2.6509490e+00
+                    0x400237da, //    2.0346589e+00
+                    0x3f95eb4c, //    1.1712432e+00
+                    0x3f0aacef, //    5.4170126e-01
+                    0x3e54dff1  //    2.0788552e-01
+                >(x) * x);
+            }
+
+            static inline B reduce(const B& a, B& x)
+            {
+                B k = nearbyint(invlog10_2<B>() * a);
+                x = fnma(k, log10_2hi<B>(), a);
+                x -= k * log10_2lo<B>();
+                return k;
+            }
+        };
+
+        template <class B>
+        struct exp_reduction<B, exp_tag, double> : exp_reduction_base<B, exp_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                B t = x * x;
+                return fnma(t,
+                    horner<B,
+                    0x3fc555555555553eull,
+                    0xbf66c16c16bebd93ull,
+                    0x3f11566aaf25de2cull,
+                    0xbebbbd41c5d26bf1ull,
+                    0x3e66376972bea4d0ull>(t), x);
+            }
+
+            static inline B reduce(const B& a, B& hi, B& lo, B& x)
+            {
+                B k = nearbyint(invlog_2<B>() * a);
+                hi = fnma(k, log_2hi<B>(), a);
+                lo = k * log_2lo<B>();
+                x = hi - lo;
+                return k;
+            }
+
+            static inline B finalize(const B& x, const B& c, const B& hi, const B& lo)
+            {
+                return B(1.) - (((lo - (x * c) / (B(2.) - c)) - hi));
+            }
+        };
+
+        template <class B>
+        struct exp_reduction<B, exp2_tag, double> : exp_reduction_base<B, exp2_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                B t = x * x;
+                return fnma(t,
+                    horner<B,
+                    0x3fc555555555553eull,
+                    0xbf66c16c16bebd93ull,
+                    0x3f11566aaf25de2cull,
+                    0xbebbbd41c5d26bf1ull,
+                    0x3e66376972bea4d0ull
+                    >(t), x);
+            }
+
+            static inline B reduce(const B& a, B&, B&, B& x)
+            {
+                B k = nearbyint(a);
+                x = (a - k) * log_2<B>();
+                return k;
+            }
+
+            static inline B finalize(const B& x, const B& c, const B&, const B&)
+            {
+                return B(1.) + x +  x * c / (B(2.) - c);
+            }
+        };
+
+        template <class B>
+        struct exp_reduction<B, exp10_tag, double> : exp_reduction_base<B, exp10_tag>
+        {
+            static inline B approx(const B& x)
+            {
+                B xx = x * x;
+                B px = x * horner<B,
+                    0x40a2b4798e134a01ull,
+                    0x40796b7a050349e4ull,
+                    0x40277d9474c55934ull,
+                    0x3fa4fd75f3062dd4ull
+                >(xx);
+                B x2 = px / (horner1<B,
+                    0x40a03f37650df6e2ull,
+                    0x4093e05eefd67782ull,
+                    0x405545fdce51ca08ull
+                >(xx) - px);
+                return ++(x2 + x2);
+            }
+
+            static inline B reduce(const B& a, B&, B&, B& x)
+            {
+                B k = nearbyint(invlog10_2<B>() * a);
+                x = fnma(k, log10_2hi<B>(), a);
+                x = fnma(k, log10_2lo<B>(), x);
+                return k;
+            }
+
+            static inline B finalize(const B&, const B& c, const B&, const B&)
+            {
+                return c;
+            }
+        };
+    }
+}
+
+#endif
diff --git a/include/xsimd/math/xsimd_exponential.hpp b/include/xsimd/math/xsimd_exponential.hpp
@@ -0,0 +1,87 @@
+/***************************************************************************
+* Copyright (c) 2016, Johan Mabille and Sylvain Corlay                     *
+*                                                                          *
+* Distributed under the terms of the BSD 3-Clause License.                 *
+*                                                                          *
+* The full license is in the file LICENSE, distributed with this software. *
+****************************************************************************/
+
+#ifndef XSIMD_EXPONENTIAL_HPP
+#define XSIMD_EXPONENTIAL_HPP
+
+#include "xsimd_exp_reduction.hpp"
+#include "xsimd_fp_manipulation.hpp"
+
+namespace xsimd
+{
+    template <class T, std::size_t N>
+    batch<T, N> exp(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> exp2(const batch<T, N>& x);
+
+    template <class T, std::size_t N>
+    batch<T, N> exp10(const batch<T, N>& x);
+
+    /******************************
+     * exponential implementation *
+     ******************************/
+
+    namespace detail
+    {
+        template <class B, class Tag, class T = typename B::value_type>
+        struct exponential;
+
+        template <class B, class Tag>
+        struct exponential<B, Tag, float>
+        {
+            static inline B compute(const B& a)
+            {
+                using reducer_t = exp_reduction<B, Tag>;
+                B x;
+                B k = reducer_t::reduce(a, x);
+                x = reducer_t::approx(x);
+                x = select(a <= reducer_t::minlog(), B(0.), ldexp(x, to_int(k)));
+                x = select(a >= reducer_t::maxlog(), infinity<B>(), x);
+                return x;
+            }
+        };
+
+        template <class B, class Tag>
+        struct exponential<B, Tag, double>
+        {
+            static inline B compute(const B& a)
+            {
+                using reducer_t = exp_reduction<B, Tag>;
+                B hi, lo, x;
+                B k = reducer_t::reduce(a, hi, lo, x);
+                B c = reducer_t::approx(x);
+                c = reducer_t::finalize(x, c, hi, lo);
+                c = select(a <= reducer_t::minlog(), B(0.), ldexp(c, to_int(k)));
+                c = select(a >= reducer_t::maxlog(), infinity<B>(), c);
+                return c;
+            }
+        };
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> exp(const batch<T, N>& x)
+    {
+        return detail::exponential<batch<T, N>, exp_tag>::compute(x);
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> exp2(const batch<T, N>& x)
+    {
+        return detail::exponential<batch<T, N>, exp2_tag>::compute(x);
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> exp10(const batch<T, N>& x)
+    {
+        return detail::exponential<batch<T, N>, exp10_tag>::compute(x);
+    }
+
+}
+
+#endif