diff --git a/docs/source/api/reducer_index.rst b/docs/source/api/reducer_index.rst
index 09f769be9..f99c7f1a7 100644
--- a/docs/source/api/reducer_index.rst
+++ b/docs/source/api/reducer_index.rst
@@ -38,6 +38,8 @@ Reduction operators
 +---------------------------------------+----------------------------------------------------+
 | :cpp:func:`reduce_min`                | min of the batch elements                          |
 +---------------------------------------+----------------------------------------------------+
+| :cpp:func:`reduce_mul`                | product of the batch elements                      |
++---------------------------------------+----------------------------------------------------+
 | :cpp:func:`haddp`                     | horizontal sum across batches                      |
 +---------------------------------------+----------------------------------------------------+
 
diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp
index 2e83d33de..8a0d2e428 100644
--- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp
+++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp
@@ -139,20 +139,6 @@ namespace xsimd
             return fma(x, y, select(mask, neg(z), z));
         }
 
-        // hadd
-        template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
-        XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<common>) noexcept
-        {
-            alignas(A::alignment()) T buffer[batch<T, A>::size];
-            self.store_aligned(buffer);
-            T res = 0;
-            for (T val : buffer)
-            {
-                res += val;
-            }
-            return res;
-        }
-
         // incr
         template <class A, class T>
         XSIMD_INLINE batch<T, A> incr(batch<T, A> const& self, requires_arch<common>) noexcept
diff --git a/include/xsimd/arch/common/xsimd_common_details.hpp b/include/xsimd/arch/common/xsimd_common_details.hpp
index 7103589b7..28590ce99 100644
--- a/include/xsimd/arch/common/xsimd_common_details.hpp
+++ b/include/xsimd/arch/common/xsimd_common_details.hpp
@@ -77,6 +77,8 @@ namespace xsimd
     template <class T, class A>
     XSIMD_INLINE T reduce_add(batch<T, A> const&) noexcept;
     template <class T, class A>
+    XSIMD_INLINE T reduce_mul(batch<T, A> const&) noexcept;
+    template <class T, class A>
     XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept;
     template <class T, class A>
     XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept;
diff --git a/include/xsimd/arch/common/xsimd_common_math.hpp b/include/xsimd/arch/common/xsimd_common_math.hpp
index 10db980e2..e5bc57db9 100644
--- a/include/xsimd/arch/common/xsimd_common_math.hpp
+++ b/include/xsimd/arch/common/xsimd_common_math.hpp
@@ -2103,6 +2103,19 @@ namespace xsimd
             return { reduce_add(self.real()), reduce_add(self.imag()) };
         }
 
+        template <class A, class T, class /*=typename std::enable_if<std::is_scalar<T>::value, void>::type*/>
+        XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            alignas(A::alignment()) T buffer[batch<T, A>::size];
+            self.store_aligned(buffer);
+            T res = 0;
+            for (T val : buffer)
+            {
+                res += val;
+            }
+            return res;
+        }
+
         namespace detail
         {
             template <class T, T N>
@@ -2147,6 +2160,34 @@ namespace xsimd
                                   self, std::integral_constant<unsigned, batch<T, A>::size>());
         }
 
+        // reduce_mul
+        template <class A, class T>
+        XSIMD_INLINE std::complex<T> reduce_mul(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
+        {
+            // FIXME: could do better
+            alignas(A::alignment()) std::complex<T> buffer[batch<std::complex<T>, A>::size];
+            self.store_aligned(buffer);
+            std::complex<T> res = 1;
+            for (auto val : buffer)
+            {
+                res *= val;
+            }
+            return res;
+        }
+
+        template <class A, class T, class /*=typename std::enable_if<std::is_scalar<T>::value, void>::type*/>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<common>) noexcept
+        {
+            alignas(A::alignment()) T buffer[batch<T, A>::size];
+            self.store_aligned(buffer);
+            T res = 1;
+            for (T val : buffer)
+            {
+                res *= val;
+            }
+            return res;
+        }
+
         // remainder
         template <class A>
         XSIMD_INLINE batch<float, A> remainder(batch<float, A> const& self, batch<float, A> const& other, requires_arch<common>) noexcept
diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp
index ed3319d4e..20bbf600f 100644
--- a/include/xsimd/arch/xsimd_avx.hpp
+++ b/include/xsimd/arch/xsimd_avx.hpp
@@ -1046,7 +1046,7 @@ namespace xsimd
         }
 
         // reduce_add
-        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value || std::is_same<T, float>::value || std::is_same<T, double>::value, void>::type>
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<avx>) noexcept
         {
             typename batch<T, sse4_2>::register_type low, high;
@@ -1077,6 +1077,16 @@ namespace xsimd
             return reduce_min(batch<T, sse4_2>(low));
         }
 
+        // reduce_mul
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<avx>) noexcept
+        {
+            typename batch<T, sse4_2>::register_type low, high;
+            detail::split_avx(self, low, high);
+            batch<T, sse4_2> blow(low), bhigh(high);
+            return reduce_mul(blow * bhigh);
+        }
+
         // rsqrt
         template <class A>
         XSIMD_INLINE batch<float, A> rsqrt(batch<float, A> const& val, requires_arch<avx>) noexcept
@@ -1911,4 +1921,4 @@ namespace xsimd
     }
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/xsimd/arch/xsimd_avx512dq.hpp b/include/xsimd/arch/xsimd_avx512dq.hpp
index 9b234587f..063affa4c 100644
--- a/include/xsimd/arch/xsimd_avx512dq.hpp
+++ b/include/xsimd/arch/xsimd_avx512dq.hpp
@@ -188,6 +188,16 @@ namespace xsimd
             return reduce_add(batch<float, avx2>(res1), avx2 {});
         }
 
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& rhs, requires_arch<avx512dq>) noexcept
+        {
+            __m256 tmp1 = _mm512_extractf32x8_ps(rhs, 1);
+            __m256 tmp2 = _mm512_extractf32x8_ps(rhs, 0);
+            __m256 res1 = _mm256_mul_ps(tmp1, tmp2);
+            return reduce_mul(batch<float, avx2>(res1), avx2 {});
+        }
+
         // swizzle constant mask
         template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3, uint32_t V4, uint32_t V5, uint32_t V6, uint32_t V7,
                   uint32_t V8, uint32_t V9, uint32_t V10, uint32_t V11, uint32_t V12, uint32_t V13, uint32_t V14, uint32_t V15>
diff --git a/include/xsimd/arch/xsimd_avx512f.hpp b/include/xsimd/arch/xsimd_avx512f.hpp
index d770bde28..7073d9d4f 100644
--- a/include/xsimd/arch/xsimd_avx512f.hpp
+++ b/include/xsimd/arch/xsimd_avx512f.hpp
@@ -1558,6 +1558,37 @@ namespace xsimd
             return reduce_min(batch<T, avx2>(low));
         }
 
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& rhs, requires_arch<avx512f>) noexcept
+        {
+            return _mm512_reduce_mul_ps(rhs);
+        }
+        template <class A>
+        XSIMD_INLINE double reduce_mul(batch<double, A> const& rhs, requires_arch<avx512f>) noexcept
+        {
+            return _mm512_reduce_mul_pd(rhs);
+        }
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<avx512f>) noexcept
+        {
+            XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
+            {
+                return _mm512_reduce_mul_epi32(self);
+            }
+            else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
+            {
+                return _mm512_reduce_mul_epi64(self);
+            }
+            else
+            {
+                __m256i low, high;
+                detail::split_avx512(self, low, high);
+                batch<T, avx2> blow(low), bhigh(high);
+                return reduce_mul(blow, avx2 {}) * reduce_mul(bhigh, avx2 {});
+            }
+        }
+
         // rsqrt
         template <class A>
         XSIMD_INLINE batch<float, A> rsqrt(batch<float, A> const& val, requires_arch<avx512f>) noexcept
diff --git a/include/xsimd/arch/xsimd_common_fwd.hpp b/include/xsimd/arch/xsimd_common_fwd.hpp
index aa9a0c163..0c3cb362e 100644
--- a/include/xsimd/arch/xsimd_common_fwd.hpp
+++ b/include/xsimd/arch/xsimd_common_fwd.hpp
@@ -36,8 +36,10 @@ namespace xsimd
         XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
         XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
-        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
-        XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<common>) noexcept;
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
+        XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<common>) noexcept;
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<common>) noexcept;
         // Forward declarations for pack-level helpers
         namespace detail
         {
diff --git a/include/xsimd/arch/xsimd_emulated.hpp b/include/xsimd/arch/xsimd_emulated.hpp
index bf169cd9a..77cf03a8a 100644
--- a/include/xsimd/arch/xsimd_emulated.hpp
+++ b/include/xsimd/arch/xsimd_emulated.hpp
@@ -601,6 +601,16 @@ namespace xsimd
                                    { return xsimd::min(x, y); });
         }
 
+        // reduce_mul
+        template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
+        {
+            constexpr size_t size = batch<T, A>::size;
+            std::array<T, size> buffer;
+            self.store_unaligned(buffer.data());
+            return std::accumulate(buffer.begin() + 1, buffer.end(), *buffer.begin(), std::multiplies<T>());
+        }
+
         // rsqrt
         template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
         XSIMD_INLINE batch<T, A> rsqrt(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
diff --git a/include/xsimd/arch/xsimd_neon.hpp b/include/xsimd/arch/xsimd_neon.hpp
index d6c800de1..5bd7d9206 100644
--- a/include/xsimd/arch/xsimd_neon.hpp
+++ b/include/xsimd/arch/xsimd_neon.hpp
@@ -1705,14 +1705,21 @@ namespace xsimd
          * reduce_max *
          **************/
 
-        // Using common implementation because ARM doe snot provide intrinsics
+        // Using common implementation because ARM does not provide intrinsics
         // for this operation
 
         /**************
          * reduce_min *
          **************/
 
-        // Using common implementation because ARM doe snot provide intrinsics
+        // Using common implementation because ARM does not provide intrinsics
+        // for this operation
+
+        /**************
+         * reduce_mul *
+         **************/
+
+        // Using common implementation because ARM does not provide intrinsics
         // for this operation
 
         /**********
diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp
index b1e447103..e8ad82875 100644
--- a/include/xsimd/arch/xsimd_sse2.hpp
+++ b/include/xsimd/arch/xsimd_sse2.hpp
@@ -1290,7 +1290,7 @@ namespace xsimd
             }
             else
             {
-                return hadd(self, common {});
+                return reduce_add(self, common {});
             }
         }
 
@@ -1344,6 +1344,52 @@ namespace xsimd
             return first(acc3, A {});
         }
 
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<sse2>) noexcept
+        {
+            __m128 tmp0 = _mm_mul_ps(self, _mm_movehl_ps(self, self));
+            __m128 tmp1 = _mm_mul_ss(tmp0, _mm_shuffle_ps(tmp0, tmp0, 1));
+            return _mm_cvtss_f32(tmp1);
+        }
+
+        template <class A>
+        XSIMD_INLINE double reduce_mul(batch<double, A> const& self, requires_arch<sse2>) noexcept
+        {
+            return _mm_cvtsd_f64(_mm_mul_sd(self, _mm_unpackhi_pd(self, self)));
+        }
+
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<sse2>) noexcept
+        {
+            XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
+            {
+                batch<T, A> tmp1 = _mm_shuffle_epi32(self, _MM_SHUFFLE(0, 1, 2, 3));
+                tmp1 = tmp1 * self;
+                batch<T, A> tmp2 = _mm_unpackhi_epi32(tmp1, tmp1);
+                tmp2 = tmp2 * tmp1;
+                return _mm_cvtsi128_si32(tmp2);
+            }
+            else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
+            {
+                batch<T, A> tmp1 = _mm_unpackhi_epi64(self, self);
+                auto tmp2 = tmp1 * self;
+#if defined(__x86_64__)
+                return _mm_cvtsi128_si64(tmp2);
+#else
+                __m128i m;
+                _mm_storel_epi64(&m, tmp2);
+                int64_t i;
+                std::memcpy(&i, &m, sizeof(i));
+                return i;
+#endif
+            }
+            else
+            {
+                return reduce_mul(self, common {});
+            }
+        }
+
         // rsqrt
         template <class A>
         XSIMD_INLINE batch<float, A> rsqrt(batch<float, A> const& val, requires_arch<sse2>) noexcept
diff --git a/include/xsimd/arch/xsimd_sse3.hpp b/include/xsimd/arch/xsimd_sse3.hpp
index 9dbc4b343..a88d693cd 100644
--- a/include/xsimd/arch/xsimd_sse3.hpp
+++ b/include/xsimd/arch/xsimd_sse3.hpp
@@ -51,6 +51,15 @@ namespace xsimd
             return _mm_cvtss_f32(tmp1);
         }
 
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<sse3>) noexcept
+        {
+            __m128 tmp1 = _mm_mul_ps(self, _mm_movehl_ps(self, self));
+            __m128 tmp2 = _mm_mul_ps(tmp1, _mm_movehdup_ps(tmp1));
+            return _mm_cvtss_f32(tmp2);
+        }
+
     }
 
 }
diff --git a/include/xsimd/arch/xsimd_vsx.hpp b/include/xsimd/arch/xsimd_vsx.hpp
index 953d67faf..0e7d74a3b 100644
--- a/include/xsimd/arch/xsimd_vsx.hpp
+++ b/include/xsimd/arch/xsimd_vsx.hpp
@@ -559,7 +559,49 @@ namespace xsimd
         template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
         XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<vsx>) noexcept
         {
-            return hadd(self, common {});
+            return reduce_add(self, common {});
+        }
+
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE signed reduce_mul(batch<signed, A> const& self, requires_arch<vsx>) noexcept
+        {
+            auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
+            auto tmp1 = vec_mul(self.data, tmp0); // v0 * v3, v1 * v2, v2 * v1, v3 * v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 * v1, v2 * v1, v3 * v0, v3 * v0
+            auto tmp3 = vec_mul(tmp1, tmp2);
+            return vec_extract(tmp3, 0);
+        }
+        template <class A>
+        XSIMD_INLINE unsigned reduce_mul(batch<unsigned, A> const& self, requires_arch<vsx>) noexcept
+        {
+            auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
+            auto tmp1 = vec_mul(self.data, tmp0); // v0 * v3, v1 * v2, v2 * v1, v3 * v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 * v1, v2 * v1, v3 * v0, v3 * v0
+            auto tmp3 = vec_mul(tmp1, tmp2);
+            return vec_extract(tmp3, 0);
+        }
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<vsx>) noexcept
+        {
+            // FIXME: find an in-order approach
+            auto tmp0 = vec_reve(self.data); // v3, v2, v1, v0
+            auto tmp1 = vec_mul(self.data, tmp0); // v0 * v3, v1 * v2, v2 * v1, v3 * v0
+            auto tmp2 = vec_mergel(tmp1, tmp1); // v2 * v1, v2 * v1, v3 * v0, v3 * v0
+            auto tmp3 = vec_mul(tmp1, tmp2);
+            return vec_extract(tmp3, 0);
+        }
+        template <class A>
+        XSIMD_INLINE double reduce_mul(batch<double, A> const& self, requires_arch<vsx>) noexcept
+        {
+            auto tmp0 = vec_reve(self.data); // v1, v0
+            auto tmp1 = vec_mul(self.data, tmp0); // v0 * v1, v1 * v0
+            return vec_extract(tmp1, 0);
+        }
+        template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<vsx>) noexcept
+        {
+            return reduce_mul(self, common {});
         }
 
         // round
diff --git a/include/xsimd/arch/xsimd_wasm.hpp b/include/xsimd/arch/xsimd_wasm.hpp
index 29d9aed0a..ad0867c5b 100644
--- a/include/xsimd/arch/xsimd_wasm.hpp
+++ b/include/xsimd/arch/xsimd_wasm.hpp
@@ -1206,7 +1206,7 @@ namespace xsimd
             }
             else
             {
-                return hadd(self, common {});
+                return reduce_add(self, common {});
             }
         }
         template <class A>
@@ -1218,6 +1218,47 @@ namespace xsimd
             return wasm_f64x2_extract_lane(tmp2, 0);
         }
 
+        // reduce_mul
+        template <class A>
+        XSIMD_INLINE float reduce_mul(batch<float, A> const& self, requires_arch<wasm>) noexcept
+        {
+            v128_t tmp0 = wasm_f32x4_mul(self, wasm_i32x4_shuffle(self, self, 6, 7, 2, 3));
+            v128_t tmp1 = wasm_i32x4_shuffle(tmp0, tmp0, 1, 0, 4, 4);
+            v128_t tmp2 = wasm_f32x4_mul(tmp0, tmp1);
+            v128_t tmp3 = wasm_i32x4_shuffle(tmp0, tmp2, 4, 1, 2, 3);
+            return wasm_f32x4_extract_lane(tmp3, 0);
+        }
+        template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+        XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<wasm>) noexcept
+        {
+            XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
+            {
+                v128_t tmp0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
+                v128_t tmp1 = wasm_i32x4_mul(self, tmp0);
+                v128_t tmp2 = wasm_i32x4_shuffle(tmp1, wasm_i32x4_splat(0), 1, 0, 0, 0);
+                v128_t tmp3 = wasm_i32x4_mul(tmp1, tmp2);
+                return wasm_i32x4_extract_lane(tmp3, 0);
+            }
+            else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
+            {
+                v128_t tmp0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
+                v128_t tmp1 = wasm_i64x2_mul(self, tmp0);
+                return wasm_i64x2_extract_lane(tmp1, 0);
+            }
+            else
+            {
+                return reduce_mul(self, common {});
+            }
+        }
+        template <class A>
+        XSIMD_INLINE double reduce_mul(batch<double, A> const& self, requires_arch<wasm>) noexcept
+        {
+            v128_t tmp0 = wasm_i64x2_shuffle(self, self, 1, 3);
+            v128_t tmp1 = wasm_f64x2_mul(self, tmp0);
+            v128_t tmp2 = wasm_i64x2_shuffle(tmp0, tmp1, 2, 1);
+            return wasm_f64x2_extract_lane(tmp2, 0);
+        }
+
         // rsqrt
         template <class A>
         XSIMD_INLINE batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp
index eeee5b267..875134766 100644
--- a/include/xsimd/types/xsimd_api.hpp
+++ b/include/xsimd/types/xsimd_api.hpp
@@ -1879,6 +1879,20 @@ namespace xsimd
         return kernel::reduce_min<A>(x, A {});
     }
 
+    /**
+     * @ingroup batch_reducers
+     *
+     * Multiplies of all the scalars of the batch \c x.
+     * @param x batch involved in the reduction
+     * @return the result of the reduction.
+     */
+    template <class T, class A>
+    XSIMD_INLINE T reduce_mul(batch<T, A> const& x) noexcept
+    {
+        detail::static_check_supported_config<T, A>();
+        return kernel::reduce_mul<A>(x, A {});
+    }
+
     /**
      * @ingroup batch_math
      *
diff --git a/test/test_batch.cpp b/test/test_batch.cpp
index d23b53729..b301a5bea 100644
--- a/test/test_batch.cpp
+++ b/test/test_batch.cpp
@@ -840,6 +840,13 @@ struct batch_test
             INFO("reduce_min");
             CHECK_SCALAR_EQ(res, expected);
         }
+        // reduce_mul
+        {
+            value_type expected = std::accumulate(lhs.cbegin(), lhs.cend(), value_type(1), std::multiplies<value_type>());
+            value_type res = reduce_mul(batch_lhs());
+            INFO("reduce_mul");
+            CHECK_SCALAR_EQ(res, expected);
+        }
     }
 
     template <size_t N>
diff --git a/test/test_batch_complex.cpp b/test/test_batch_complex.cpp
index 47ed9ca5b..e06ad83fb 100644
--- a/test/test_batch_complex.cpp
+++ b/test/test_batch_complex.cpp
@@ -578,6 +578,12 @@ struct batch_complex_test
             value_type res = reduce_add(batch_lhs());
             CHECK_SCALAR_EQ(res, expected);
         }
+        // reduce_mul
+        {
+            value_type expected = std::accumulate(lhs.cbegin(), lhs.cend(), value_type(1), std::multiplies<value_type>());
+            value_type res = reduce_mul(batch_lhs());
+            CHECK_SCALAR_EQ(res, expected);
+        }
     }
 
     void test_fused_operations() const