Merge pull request #33 from JohanMabille/expm1

expm1 implementation
xtensor-stack · Jun 2, 2017 · 30101f2 · 30101f2
2 parents da1fa0b + d6fb74b
commit 30101f2
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 12 deletions.
diff --git a/include/xsimd/math/xsimd_exponential.hpp b/include/xsimd/math/xsimd_exponential.hpp
@@ -23,17 +23,20 @@ namespace xsimd
     template <class T, std::size_t N>
     batch<T, N> exp10(const batch<T, N>& x);
 
+    template <class T, std::size_t N>
+    batch<T, N> expm1(const batch<T, N>& x);
+
     /******************************
      * exponential implementation *
      ******************************/
 
     namespace detail
     {
         template <class B, class Tag, class T = typename B::value_type>
-        struct exponential;
+        struct exp_kernel;
 
         template <class B, class Tag>
-        struct exponential<B, Tag, float>
+        struct exp_kernel<B, Tag, float>
         {
             static inline B compute(const B& a)
             {
@@ -48,7 +51,7 @@ namespace xsimd
         };
 
         template <class B, class Tag>
-        struct exponential<B, Tag, double>
+        struct exp_kernel<B, Tag, double>
         {
             static inline B compute(const B& a)
             {
@@ -67,19 +70,98 @@ namespace xsimd
     template <class T, std::size_t N>
     inline batch<T, N> exp(const batch<T, N>& x)
     {
-        return detail::exponential<batch<T, N>, exp_tag>::compute(x);
+        return detail::exp_kernel<batch<T, N>, exp_tag>::compute(x);
     }
 
     template <class T, std::size_t N>
     inline batch<T, N> exp2(const batch<T, N>& x)
     {
-        return detail::exponential<batch<T, N>, exp2_tag>::compute(x);
+        return detail::exp_kernel<batch<T, N>, exp2_tag>::compute(x);
     }
 
     template <class T, std::size_t N>
     inline batch<T, N> exp10(const batch<T, N>& x)
     {
-        return detail::exponential<batch<T, N>, exp10_tag>::compute(x);
+        return detail::exp_kernel<batch<T, N>, exp10_tag>::compute(x);
+    }
+
+    /************************
+     * expm1 implementation *
+     ************************/
+
+    namespace detail
+    {
+        template <class B, class T = typename B::value_type>
+        struct expm1_kernel;
+
+        template <class B>
+        struct expm1_kernel<B, float>
+        {
+            static inline B compute(const B& a)
+            {
+                B k = nearbyint(invlog_2<B>() * a);
+                B x = fnma(k, log_2hi<B>(), a);
+                x = fnma(k, log_2lo<B>(), x);
+                B hx = x * B(0.5);
+                B hxs = x * hx;
+                B r = horner<B,
+                    0X3F800000UL, // 1
+                    0XBD08887FUL, // -3.3333298E-02
+                    0X3ACF6DB4UL  // 1.582554
+                >(hxs);
+                B t = fnma(r, hx, B(3.));
+                B e = hxs * ((r - t) / (B(6.) - x * t));
+                e = fms(x, e, hxs);
+                using i_type = as_integer_t<B>;
+                i_type ik = to_int(k);
+                B two2mk = bitwise_cast<B>((maxexponent<B>() - ik) << nmb<B>());
+                B y = B(1.) - two2mk - (e - x);
+                return ldexp(y, ik);
+            }
+        };
+
+        template <class B>
+        struct expm1_kernel<B, double>
+        {
+            static inline B compute(const B& a)
+            {
+                B k = nearbyint(invlog_2<B>() * a);
+                B hi = fnma(k, log_2hi<B>(), a);
+                B lo = k * log_2lo<B>();
+                B x = hi - lo;
+                B hxs = x * x * B(0.5);
+                B r = horner<B,
+                    0X3FF0000000000000ULL,
+                    0XBFA11111111110F4ULL,
+                    0X3F5A01A019FE5585ULL,
+                    0XBF14CE199EAADBB7ULL,
+                    0X3ED0CFCA86E65239ULL,
+                    0XBE8AFDB76E09C32DULL
+                >(hxs);
+                B t = B(3.) - r * B(0.5) * x;
+                B e = hxs * ((r - t) / (B(6) - x * t));
+                B c = (hi - x) - lo;
+                e = (x * (e - c) - c) - hxs;
+                using i_type = as_integer_t<B>;
+                i_type ik = to_int(k);
+                B two2mk = bitwise_cast<B>((maxexponent<B>() - ik) << nmb<B>());
+                B ct1 = B(1.) - two2mk - (e - x);
+                B ct2 = ++(x - (e + two2mk));
+                B y = select(k < B(20.), ct1, ct2);
+                return ldexp(y, ik);
+            }
+        };
+    }
+
+    template <class T, std::size_t N>
+    inline batch<T, N> expm1(const batch<T, N>& x)
+    {
+        using b_type = batch<T, N>;
+        return select(x < logeps<b_type>(),
+                      b_type(-1.),
+                      select(x > maxlog<b_type>(),
+                             infinity<b_type>(),
+                             detail::expm1_kernel<b_type, T>::compute(x)));
     }
 
 }

diff --git a/include/xsimd/math/xsimd_fp_manipulation.hpp b/include/xsimd/math/xsimd_fp_manipulation.hpp
@@ -25,8 +25,8 @@ namespace xsimd
     inline batch<T, N> ldexp(const batch<T, N>& x, const batch<as_integer_t<T>, N>& e)
     {
         using btype = batch<T, N>;
-        using itype = batch<as_integer_t<T>, N>;
-        itype ik = e + itype(maxexponent<T>());
+        using itype = as_integer_t<btype>;
+        itype ik = e + maxexponent<T>();
         ik = ik << nmb<T>();
         return x * bitwise_cast<btype>(ik);
     }

diff --git a/include/xsimd/math/xsimd_numerical_constant.hpp b/include/xsimd/math/xsimd_numerical_constant.hpp
@@ -39,7 +39,10 @@ namespace xsimd
     constexpr T log10_2lo() noexcept;
 
     template <class T>
-    constexpr int32_t maxexponent() noexcept;
+    constexpr T logeps() noexcept;
+
+    template <class T>
+    constexpr as_integer_t<T> maxexponent() noexcept;
 
     template <class T>
     constexpr T maxflint() noexcept;
@@ -247,18 +250,46 @@ namespace xsimd
         return detail::caster64_t(uint64_t(0x3ed3509f79fef312U)).f;
     }
 
+    /*************************
+     * logeps implementation *
+     *************************/
+
+    template <class T>
+    constexpr T logeps() noexcept
+    {
+        return T(logeps<typename T::value_type>());
+    }
+
+    template <>
+    constexpr float logeps<float>() noexcept
+    {
+        return detail::caster32_t(0xc17f1402U).f;
+    }
+
+    template <>
+    constexpr double logeps<double>() noexcept
+    {
+        return detail::caster64_t(uint64_t(0xc04205966f2b4f12U)).f;
+    }
+
     /******************************
      * maxexponent implementation *
      ******************************/
 
+    template <class T>
+    constexpr as_integer_t<T> maxexponent() noexcept
+    {
+        return as_integer_t<T>(maxexponent<typename T::value_type>());
+    }
+
     template<>
     constexpr int32_t maxexponent<float>() noexcept
     {
         return 127;
     }
 
     template <>
-    constexpr int32_t maxexponent<double>() noexcept
+    constexpr int64_t maxexponent<double>() noexcept
     {
         return 1023;
     }
@@ -443,6 +474,12 @@ namespace xsimd
      * nmb implementation *
      **********************/
 
+    template <class T>
+    constexpr int32_t nmb() noexcept
+    {
+        return nmb<typename T::value_type>();
+    }
+
     template <>
     constexpr int32_t nmb<float>() noexcept
     {

diff --git a/include/xsimd/types/xsimd_utils.hpp b/include/xsimd/types/xsimd_utils.hpp
@@ -14,6 +14,9 @@
 namespace xsimd
 {
 
+    template <class T, size_t N>
+    class batch;
+
     /**************
      * as_integer *
      **************/
@@ -33,6 +36,12 @@ namespace xsimd
         using type = int64_t;
     };
 
+    template <class T, std::size_t N>
+    struct as_integer<batch<T, N>>
+    {
+        using type = batch<typename as_integer<T>::type, N>;
+    };
+
     template <class T>
     using as_integer_t = typename as_integer<T>::type;
 
@@ -55,6 +64,12 @@ namespace xsimd
         using type = uint64_t;
     };
 
+    template <class T, std::size_t N>
+    struct as_unsigned_integer<batch<T, N>>
+    {
+        using type = batch<typename as_unsigned_integer<T>::type, N>;
+    };
+
     template <class T>
     using as_unsigned_integer_t = typename as_unsigned_integer<T>::type;
 
@@ -77,6 +92,12 @@ namespace xsimd
         using type = double;
     };
 
+    template <class T, std::size_t N>
+    struct as_float<batch<T, N>>
+    {
+        using type = batch<typename as_float<T>::type, N>;
+    };
+
     template <class T>
     using as_float_t = typename as_float<T>::type;
 

diff --git a/test/xsimd_exponential_test.hpp b/test/xsimd_exponential_test.hpp
@@ -29,6 +29,7 @@ namespace xsimd
         res_type input_exp;
         res_type exp_res;
         res_type exp2_res;
+        res_type expm1_res;
 
         simd_exponential_tester(const std::string& n);
     };
@@ -41,12 +42,14 @@ namespace xsimd
         input_exp.resize(nb_input);
         exp_res.resize(nb_input);
         exp2_res.resize(nb_input);
+        expm1_res.resize(nb_input);
 
         for (size_t i = 0; i < nb_input; ++i)
         {
             input_exp[i] = value_type(-1.5) + i * value_type(3) / nb_input;
             exp_res[i] = std::exp(input_exp[i]);
             exp2_res[i] = std::exp2(input_exp[i]);
+            expm1_res[i] = std::expm1(input_exp[i]);
         }
     }
 
@@ -76,7 +79,7 @@ namespace xsimd
         out << space << name << " " << val_type << std::endl;
         out << dash << name_shift << '-' << shift << dash << std::endl << std::endl;
 
-        out << "exp  : ";
+        out << "exp   : ";
         for (size_t i = 0; i < tester.input_exp.size(); i += tester.size)
         {
             detail::load_vec(input_exp, tester.input_exp, i);
@@ -86,7 +89,7 @@ namespace xsimd
         tmp_success = check_almost_equal(res, tester.exp_res, out);
         success = success && tmp_success;
 
-        out << "exp2 : ";
+        out << "exp2  : ";
         for (size_t i = 0; i < tester.input_exp.size(); i += tester.size)
         {
             detail::load_vec(input_exp, tester.input_exp, i);
@@ -96,6 +99,16 @@ namespace xsimd
         tmp_success = check_almost_equal(res, tester.exp2_res, out);
         success = success && tmp_success;
 
+        out << "expm1 : ";
+        for (size_t i = 0; i < tester.input_exp.size(); i += tester.size)
+        {
+            detail::load_vec(input_exp, tester.input_exp, i);
+            vres = expm1(input_exp);
+            detail::store_vec(vres, res, i);
+        }
+        tmp_success = check_almost_equal(res, tester.expm1_res, out);
+        success = success && tmp_success;
+
         return success;
     }
 }