Merge pull request #22 from JohanMabille/basic

basic operations
xtensor-stack · May 24, 2017 · bfd5b04 · bfd5b04
2 parents 0153203 + 4517d43
commit bfd5b04
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 4 deletions.
diff --git a/include/xsimd/types/xsimd_avx_double.hpp b/include/xsimd/types/xsimd_avx_double.hpp
@@ -77,6 +77,8 @@ namespace xsimd
         void store_aligned(double* dst) const;
         void store_unaligned(double* dst) const;
 
+        double operator[](std::size_t index) const;
+
     private:
 
         __m256d m_value;
@@ -100,6 +102,8 @@ namespace xsimd
 
     batch<double, 4> min(const batch<double, 4>& lhs, const batch<double, 4>& rhs);
     batch<double, 4> max(const batch<double, 4>& lhs, const batch<double, 4>& rhs);
+    batch<double, 4> fmin(const batch<double, 4>& lhs, const batch<double, 4>& rhs);
+    batch<double, 4> fmax(const batch<double, 4>& lhs, const batch<double, 4>& rhs);
 
     batch<double, 4> abs(const batch<double, 4>& rhs);
     batch<double, 4> sqrt(const batch<double, 4>& rhs);
@@ -236,6 +240,13 @@ namespace xsimd
         _mm256_storeu_pd(dst, m_value);
     }
 
+    inline double batch<double, 4>::operator[](std::size_t index) const
+    {
+        alignas(32) double x[4];
+        store_aligned(x);
+        return x[index & 3];
+    }
+
     inline batch<double, 4> operator-(const batch<double, 4>& rhs)
     {
         return _mm256_xor_pd(rhs, _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000)));
@@ -311,6 +322,16 @@ namespace xsimd
         return _mm256_max_pd(lhs, rhs);
     }
 
+    inline batch<double, 4> fmin(const batch<double, 4>& lhs, const batch<double, 4>& rhs)
+    {
+        return min(lhs, rhs);
+    }
+
+    inline batch<double, 4> fmax(const batch<double, 4>& lhs, const batch<double, 4>& rhs)
+    {
+        return max(lhs, rhs);
+    }
+
     inline batch<double, 4> abs(const batch<double, 4>& rhs)
     {
         __m256d sign_mask = _mm256_set1_pd(-0.); // -0. = 1 << 63

diff --git a/include/xsimd/types/xsimd_avx_float.hpp b/include/xsimd/types/xsimd_avx_float.hpp
@@ -79,6 +79,8 @@ namespace xsimd
         void store_aligned(float* dst) const;
         void store_unaligned(float* dst) const;
 
+        float operator[](std::size_t index) const;
+
     private:
 
         __m256 m_value;
@@ -102,6 +104,8 @@ namespace xsimd
 
     batch<float, 8> min(const batch<float, 8>& lhs, const batch<float, 8>& rhs);
     batch<float, 8> max(const batch<float, 8>& lhs, const batch<float, 8>& rhs);
+    batch<float, 8> fmin(const batch<float, 8>& lhs, const batch<float, 8>& rhs);
+    batch<float, 8> fmax(const batch<float, 8>& lhs, const batch<float, 8>& rhs);
 
     batch<float, 8> abs(const batch<float, 8>& rhs);
     batch<float, 8> sqrt(const batch<float, 8>& rhs);
@@ -240,6 +244,13 @@ namespace xsimd
         _mm256_store_ps(dst, m_value);
     }
 
+    inline float batch<float, 8>::operator[](std::size_t index) const
+    {
+        alignas(32) float x[8];
+        store_aligned(x);
+        return x[index & 7];
+    }
+
     inline batch<float, 8> operator-(const batch<float, 8>& rhs)
     {
         return _mm256_xor_ps(rhs, _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
@@ -315,6 +326,16 @@ namespace xsimd
         return _mm256_max_ps(lhs, rhs);
     }
 
+    inline batch<float, 8> fmin(const batch<float, 8>& lhs, const batch<float, 8>& rhs)
+    {
+        return min(lhs, rhs);
+    }
+
+    inline batch<float, 8> fmax(const batch<float, 8>& lhs, const batch<float, 8>& rhs)
+    {
+        return max(lhs, rhs);
+    }
+
     inline batch<float, 8> abs(const batch<float, 8>& rhs)
     {
         __m256 sign_mask = _mm256_set1_ps(-0.f); // -0.f = 1 << 31

diff --git a/include/xsimd/types/xsimd_avx_int.hpp b/include/xsimd/types/xsimd_avx_int.hpp
@@ -76,6 +76,8 @@ namespace xsimd
         void store_aligned(int* dst) const;
         void store_unaligned(int* dst) const;
 
+        int operator[](std::size_t index) const;
+
     private:
 
         __m256i m_value;
@@ -273,6 +275,13 @@ namespace xsimd
         _mm256_storeu_si256((__m256i*)dst, m_value);
     }
 
+    inline int batch<int, 8>::operator[](std::size_t index) const
+    {
+        alignas(32) int x[8];
+        store_aligned(x);
+        return x[index & 7];
+    }
+
     inline batch<int, 8> operator-(const batch<int, 8>& rhs)
     {
 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION

diff --git a/include/xsimd/types/xsimd_sse_double.hpp b/include/xsimd/types/xsimd_sse_double.hpp
@@ -77,6 +77,8 @@ namespace xsimd
         void store_aligned(double* dst) const;
         void store_unaligned(double* dst) const;
 
+        double operator[](std::size_t index) const;
+
     private:
 
         __m128d m_value;
@@ -100,6 +102,8 @@ namespace xsimd
 
     batch<double, 2> min(const batch<double, 2>& lhs, const batch<double, 2>& rhs);
     batch<double, 2> max(const batch<double, 2>& lhs, const batch<double, 2>& rhs);
+    batch<double, 2> fmin(const batch<double, 2>& lhs, const batch<double, 2>& rhs);
+    batch<double, 2> fmax(const batch<double, 2>& lhs, const batch<double, 2>& rhs);
 
     batch<double, 2> abs(const batch<double, 2>& rhs);
     batch<double, 2> sqrt(const batch<double, 2>& rhs);
@@ -235,6 +239,13 @@ namespace xsimd
         _mm_storeu_pd(dst, m_value);
     }
 
+    inline double batch<double, 2>::operator[](std::size_t index) const
+    {
+        alignas(16) double x[2];
+        store_aligned(x);
+        return x[index & 1];
+    }
+
     inline batch<double, 2> operator-(const batch<double, 2>& rhs)
     {
         return _mm_xor_pd(rhs, _mm_castsi128_pd(_mm_setr_epi32(0, 0x80000000,
@@ -311,6 +322,16 @@ namespace xsimd
         return _mm_max_pd(lhs, rhs);
     }
 
+    inline batch<double, 2> fmin(const batch<double, 2>& lhs, const batch<double, 2>& rhs)
+    {
+        return min(lhs, rhs);
+    }
+
+    inline batch<double, 2> fmax(const batch<double, 2>& lhs, const batch<double, 2>& rhs)
+    {
+        return max(lhs, rhs);
+    }
+
     inline batch<double, 2> abs(const batch<double, 2>& rhs)
     {
         __m128d sign_mask = _mm_set1_pd(-0.); // -0. = 1 << 63

diff --git a/include/xsimd/types/xsimd_sse_float.hpp b/include/xsimd/types/xsimd_sse_float.hpp
@@ -77,6 +77,8 @@ namespace xsimd
         void store_aligned(float* dst) const;
         void store_unaligned(float* dst) const;
 
+        float operator[](std::size_t index) const;
+
     private:
 
         __m128 m_value;
@@ -100,6 +102,8 @@ namespace xsimd
 
     batch<float, 4> min(const batch<float, 4>& lhs, const batch<float, 4>& rhs);
     batch<float, 4> max(const batch<float, 4>& lhs, const batch<float, 4>& rhs);
+    batch<float, 4> fmin(const batch<float, 4>& lhs, const batch<float, 4>& rhs);
+    batch<float, 4> fmax(const batch<float, 4>& lhs, const batch<float, 4>& rhs);
 
     batch<float, 4> abs(const batch<float, 4>& rhs);
     batch<float, 4> sqrt(const batch<float, 4>& rhs);
@@ -234,6 +238,13 @@ namespace xsimd
         _mm_storeu_ps(dst, m_value);
     }
 
+    inline float batch<float, 4>::operator[](std::size_t index) const
+    {
+        alignas(16) float x[4];
+        store_aligned(x);
+        return x[index & 3];
+    }
+
     inline batch<float, 4> operator-(const batch<float, 4>& rhs)
     {
         return _mm_xor_ps(rhs, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
@@ -309,6 +320,16 @@ namespace xsimd
         return _mm_max_ps(lhs, rhs);
     }
 
+    inline batch<float, 4> fmin(const batch<float, 4>& lhs, const batch<float, 4>& rhs)
+    {
+        return min(lhs, rhs);
+    }
+
+    inline batch<float, 4> fmax(const batch<float, 4>& lhs, const batch<float, 4>& rhs)
+    {
+        return max(lhs, rhs);
+    }
+
     inline batch<float, 4> abs(const batch<float, 4>& rhs)
     {
         __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31

diff --git a/include/xsimd/types/xsimd_sse_int.hpp b/include/xsimd/types/xsimd_sse_int.hpp
@@ -76,6 +76,8 @@ namespace xsimd
         void store_aligned(int* dst) const;
         void store_unaligned(int* dst) const;
 
+        int operator[](std::size_t index) const;
+
     private:
 
         __m128i m_value;
@@ -232,6 +234,13 @@ namespace xsimd
         _mm_storeu_si128((__m128i*)dst, m_value);
     }
 
+    inline int batch<int, 4>::operator[](std::size_t index) const
+    {
+        alignas(16) int x[4];
+        store_aligned(x);
+        return x[index & 3];
+    }
+
     inline batch<int, 4> operator-(const batch<int, 4>& rhs)
     {
         return _mm_sub_epi32(_mm_setzero_si128(), rhs);

diff --git a/test/xsimd_basic_test.hpp b/test/xsimd_basic_test.hpp
@@ -29,6 +29,7 @@ namespace xsimd
         res_type lhs;
         res_type rhs;
 
+        value_type extract_res;
         res_type minus_res;
         res_type add_vv_res;
         res_type add_vs_res;
@@ -106,6 +107,7 @@ namespace xsimd
         {
             lhs[i] = value_type(i) / 4 + value_type(1.2) * std::sqrt(value_type(i + 0.25));
             rhs[i] = value_type(10.2) / (i+2) + value_type(0.25);
+            extract_res = lhs[1];
             minus_res[i] = -lhs[i];
             add_vv_res[i] = lhs[i] + rhs[i];
             add_vs_res[i] = lhs[i] + s;
@@ -154,6 +156,7 @@ namespace xsimd
         res_type lhs;
         res_type rhs;
 
+        value_type extract_res;
         res_type minus_res;
         res_type add_vv_res;
         res_type add_vs_res;
@@ -221,6 +224,7 @@ namespace xsimd
         {
             lhs[i] = value_type(i) * 10;
             rhs[i] = value_type(4) + value_type(i);
+            extract_res = lhs[1];
             minus_res[i] = -lhs[i];
             add_vv_res[i] = lhs[i] + rhs[i];
             add_vs_res[i] = lhs[i] + s;
@@ -274,6 +278,12 @@ namespace xsimd
         out << space << name << " " << val_type << std::endl;
         out << dash << name_shift << '-' << shift << dash << std::endl << std::endl;
 
+        out << "operator[]               : ";
+        detail::load_vec(lhs, tester.lhs);
+        value_type es = lhs[1];
+        tmp_success = check_almost_equal(es, tester.extract_res, out);
+        success = success && tmp_success;
+
         out << "load/store aligned       : ";
         detail::load_vec(lhs, tester.lhs);
         detail::store_vec(lhs, res);
@@ -435,6 +445,18 @@ namespace xsimd
         tmp_success = check_almost_equal(res, tester.max_res, out);
         success = success && tmp_success;
 
+        out << "fmin(simd, simd)         : ";
+        vres = fmin(lhs, rhs);
+        detail::store_vec(vres, res);
+        tmp_success = check_almost_equal(res, tester.min_res, out);
+        success = success && tmp_success;
+
+        out << "fmax(simd, simd)         : ";
+        vres = fmax(lhs, rhs);
+        detail::store_vec(vres, res);
+        tmp_success = check_almost_equal(res, tester.max_res, out);
+        success = success && tmp_success;
+
         out << "abs(simd)                : ";
         vres = abs(lhs);
         detail::store_vec(vres, res);
@@ -459,13 +481,13 @@ namespace xsimd
         tmp_success = check_almost_equal(res, tester.fms_res, out);
         success = success && tmp_success;
 
-        out << "fnma(simd, simd, simd)    : ";
+        out << "fnma(simd, simd, simd)   : ";
         vres = fnma(lhs, rhs, rhs);
         detail::store_vec(vres, res);
         tmp_success = check_almost_equal(res, tester.fnma_res, out);
         success = success && tmp_success;
 
-        out << "fnms(simd, simd, simd)    : ";
+        out << "fnms(simd, simd, simd)   : ";
         vres = fnms(lhs, rhs, rhs);
         detail::store_vec(vres, res);
         tmp_success = check_almost_equal(res, tester.fnms_res, out);
@@ -506,6 +528,12 @@ namespace xsimd
         out << space << name << " " << val_type << std::endl;
         out << dash << name_shift << '-' << shift << dash << std::endl << std::endl;
 
+        out << "operator[]               : ";
+        detail::load_vec(lhs, tester.lhs);
+        value_type es = lhs[1];
+        tmp_success = check_almost_equal(es, tester.extract_res, out);
+        success = success && tmp_success;
+
         out << "load/store aligned       : ";
         detail::load_vec(lhs, tester.lhs);
         detail::store_vec(lhs, res);
@@ -658,13 +686,13 @@ namespace xsimd
         tmp_success = check_almost_equal(res, tester.fms_res, out);
         success = success && tmp_success;
 
-        out << "fnma(simd, simd, simd)    : ";
+        out << "fnma(simd, simd, simd)   : ";
         vres = fnma(lhs, rhs, rhs);
         detail::store_vec(vres, res);
         tmp_success = check_almost_equal(res, tester.fnma_res, out);
         success = success && tmp_success;
 
-        out << "fnms(simd, simd, simd)    : ";
+        out << "fnms(simd, simd, simd)   : ";
         vres = fnms(lhs, rhs, rhs);
         detail::store_vec(vres, res);
         tmp_success = check_almost_equal(res, tester.fnms_res, out);