Merge pull request #909 from xtensor-stack/feature/incr-decr

Feature/incr decr
xtensor-stack · Mar 27, 2023 · d4f4843 · d4f4843
2 parents 5c15ff1 + fcd2dbb
commit d4f4843
Show file tree

Hide file tree

Showing 8 changed files with 292 additions and 64 deletions.
diff --git a/docs/source/api/batch_manip.rst b/docs/source/api/batch_manip.rst
@@ -24,3 +24,7 @@ Conditional expression
 .. doxygenfunction:: select(batch_bool_constant<batch<T, A>, Values...> const &cond, batch<T, A> const &true_br, batch<T, A> const &false_br) noexcept
    :project: xsimd
 
+
+In the specific case when one needs to conditionnaly increment or decrement a
+batch based on a mask, :cpp:func:`incr_if` and
+:cpp:func:`decr_if` provide specialized version.
diff --git a/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp b/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
@@ -43,6 +43,20 @@ namespace xsimd
                                  self, other);
         }
 
+        // decr
+        template <class A, class T>
+        inline batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
+        {
+            return self - T(1);
+        }
+
+        // decr_if
+        template <class A, class T, class Mask>
+        inline batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
+        {
+            return select(mask, decr(self), self);
+        }
+
         // div
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
@@ -112,6 +126,20 @@ namespace xsimd
             return { res_r, res_i };
         }
 
+        // incr
+        template <class A, class T>
+        inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
+        {
+            return self + T(1);
+        }
+
+        // incr_if
+        template <class A, class T, class Mask>
+        inline batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
+        {
+            return select(mask, incr(self), self);
+        }
+
         // mul
         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
         inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept

diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp
@@ -549,6 +549,13 @@ namespace xsimd
             }
         }
 
+        // decr_if
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
+        {
+            return self + batch<T, A>(mask.data);
+        }
+
         // div
         template <class A>
         inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
@@ -749,6 +756,13 @@ namespace xsimd
             return _mm256_add_pd(tmp1, tmp2);
         }
 
+        // incr_if
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
+        {
+            return self - batch<T, A>(mask.data);
+        }
+
         // insert
         template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<avx>) noexcept

diff --git a/include/xsimd/arch/xsimd_scalar.hpp b/include/xsimd/arch/xsimd_scalar.hpp
@@ -138,6 +138,18 @@ namespace xsimd
         return x + y;
     }
 
+    template <class T>
+    inline T incr(T const& x) noexcept
+    {
+        return x + T(1);
+    }
+
+    template <class T>
+    inline T incr_if(T const& x, bool mask) noexcept
+    {
+        return x + T(mask ? 1 : 0);
+    }
+
     inline bool all(bool mask)
     {
         return mask;
@@ -753,6 +765,18 @@ namespace xsimd
         return x - y;
     }
 
+    template <class T>
+    inline T decr(T const& x) noexcept
+    {
+        return x - T(1);
+    }
+
+    template <class T>
+    inline T decr_if(T const& x, bool mask) noexcept
+    {
+        return x - T(mask ? 1 : 0);
+    }
+
 #ifdef XSIMD_ENABLE_XTL_COMPLEX
     template <class T, bool i3ec>
     inline xtl::xcomplex<T, T, i3ec> log2(const xtl::xcomplex<T, T, i3ec>& val) noexcept

diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp
@@ -501,6 +501,13 @@ namespace xsimd
             }
         }
 
+        // decr_if
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        inline batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
+        {
+            return self + batch<T, A>(mask.data);
+        }
+
         // div
         template <class A>
         inline batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept
@@ -808,6 +815,13 @@ namespace xsimd
                               _mm_unpackhi_pd(row[0], row[1]));
         }
 
+        // incr_if
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        inline batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<sse2>) noexcept
+        {
+            return self - batch<T, A>(mask.data);
+        }
+
         // insert
         template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         inline batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<sse2>) noexcept

diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp
@@ -82,7 +82,7 @@ namespace xsimd
      * @return the sum of \c x and \c y
      */
     template <class T, class A>
-    inline auto add(batch<T> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
+    inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
     {
         detail::static_check_supported_config<T, A>();
         return x + y;
@@ -546,6 +546,36 @@ namespace xsimd
         return kernel::cosh<A>(x, A {});
     }
 
+    /**
+     * @ingroup batch_arithmetic
+     *
+     * Subtract 1 to batch \c x.
+     * @param x batch involved in the decrement.
+     * @return the subtraction of \c x and 1.
+     */
+    template <class T, class A>
+    inline batch<T, A> decr(batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::decr<A>(x, A {});
+    }
+
+    /**
+     * @ingroup batch_arithmetic
+     *
+     * Subtract 1 to batch \c x for each element where \c mask is true.
+     * @param x batch involved in the increment.
+     * @param mask whether to perform the increment or not. Can be a \c
+     *             batch_bool or a \c batch_bool_constant.
+     * @return the subtraction of \c x and 1 when \c mask is true.
+     */
+    template <class T, class A, class Mask>
+    inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::decr_if<A>(x, mask, A {});
+    }
+
     /**
      * @ingroup batch_arithmetic
      *
@@ -878,63 +908,6 @@ namespace xsimd
         return x > y;
     }
 
-    /**
-     * @ingroup batch_reducers
-     *
-     * Generic reducer using only batch operations
-     * @param f reducing function, accepting `batch ()(batch, batch)`
-     * @param x batch involved in the reduction
-     * @return the result of the reduction, as a scalar.
-     */
-    template <class T, class A, class F>
-    inline T reduce(F&& f, batch<T, A> const& x) noexcept
-    {
-        detail::static_check_supported_config<T, A>();
-        return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
-    }
-
-    /**
-     * @ingroup batch_reducers
-     *
-     * Adds all the scalars of the batch \c x.
-     * @param x batch involved in the reduction
-     * @return the result of the reduction.
-     */
-    template <class T, class A>
-    inline T reduce_add(batch<T, A> const& x) noexcept
-    {
-        detail::static_check_supported_config<T, A>();
-        return kernel::reduce_add<A>(x, A {});
-    }
-
-    /**
-     * @ingroup batch_reducers
-     *
-     * Max of all the scalars of the batch \c x.
-     * @param x batch involved in the reduction
-     * @return the result of the reduction.
-     */
-    template <class T, class A>
-    inline T reduce_max(batch<T, A> const& x) noexcept
-    {
-        detail::static_check_supported_config<T, A>();
-        return kernel::reduce_max<A>(x, A {});
-    }
-
-    /**
-     * @ingroup batch_reducers
-     *
-     * Min of all the scalars of the batch \c x.
-     * @param x batch involved in the reduction
-     * @return the result of the reduction.
-     */
-    template <class T, class A>
-    inline T reduce_min(batch<T, A> const& x) noexcept
-    {
-        detail::static_check_supported_config<T, A>();
-        return kernel::reduce_min<A>(x, A {});
-    }
-
     /**
      * @ingroup batch_reducers
      *
@@ -981,6 +954,36 @@ namespace xsimd
         return kernel::imag<A>(x, A {});
     }
 
+    /**
+     * @ingroup batch_arithmetic
+     *
+     * Add 1 to batch \c x.
+     * @param x batch involved in the increment.
+     * @return the sum of \c x and 1.
+     */
+    template <class T, class A>
+    inline batch<T, A> incr(batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::incr<A>(x, A {});
+    }
+
+    /**
+     * @ingroup batch_arithmetic
+     *
+     * Add 1 to batch \c x for each element where \c mask is true.
+     * @param x batch involved in the increment.
+     * @param mask whether to perform the increment or not. Can be a \c
+     *             batch_bool or a \c batch_bool_constant.
+     * @return the sum of \c x and 1 when \c mask is true.
+     */
+    template <class T, class A, class Mask>
+    inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::incr_if<A>(x, mask, A {});
+    }
+
     /**
      * @ingroup batch_constant
      *
@@ -1595,6 +1598,20 @@ namespace xsimd
         return kernel::proj(z, A {});
     }
 
+    /**
+     * @ingroup batch_complex
+     *
+     * Computes the real part of the batch \c z.
+     * @param z batch of complex or real values.
+     * @return the argument of \c z.
+     */
+    template <class T, class A>
+    inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::real<A>(z, A {});
+    }
+
     /**
      * @ingroup batch_arithmetic
      *
@@ -1612,17 +1629,60 @@ namespace xsimd
     }
 
     /**
-     * @ingroup batch_complex
+     * @ingroup batch_reducers
      *
-     * Computes the real part of the batch \c z.
-     * @param z batch of complex or real values.
-     * @return the argument of \c z.
+     * Generic reducer using only batch operations
+     * @param f reducing function, accepting `batch ()(batch, batch)`
+     * @param x batch involved in the reduction
+     * @return the result of the reduction, as a scalar.
+     */
+    template <class T, class A, class F>
+    inline T reduce(F&& f, batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
+    }
+
+    /**
+     * @ingroup batch_reducers
+     *
+     * Adds all the scalars of the batch \c x.
+     * @param x batch involved in the reduction
+     * @return the result of the reduction.
      */
     template <class T, class A>
-    inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
+    inline T reduce_add(batch<T, A> const& x) noexcept
     {
         detail::static_check_supported_config<T, A>();
-        return kernel::real<A>(z, A {});
+        return kernel::reduce_add<A>(x, A {});
+    }
+
+    /**
+     * @ingroup batch_reducers
+     *
+     * Max of all the scalars of the batch \c x.
+     * @param x batch involved in the reduction
+     * @return the result of the reduction.
+     */
+    template <class T, class A>
+    inline T reduce_max(batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::reduce_max<A>(x, A {});
+    }
+
+    /**
+     * @ingroup batch_reducers
+     *
+     * Min of all the scalars of the batch \c x.
+     * @param x batch involved in the reduction
+     * @return the result of the reduction.
+     */
+    template <class T, class A>
+    inline T reduce_min(batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::reduce_min<A>(x, A {});
     }
 
     /**