Skip to content

Commit

Permalink
Merge pull request #23 from JohanMabille/rounding
Browse files Browse the repository at this point in the history
rounding functions
  • Loading branch information
JohanMabille committed May 29, 2017
2 parents bfd5b04 + 2de9097 commit d8629e9
Show file tree
Hide file tree
Showing 10 changed files with 569 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ set(XSIMD_HEADERS
${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_config.hpp
${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_include.hpp
${XSIMD_INCLUDE_DIR}/xsimd/config/xsimd_instruction_set.hpp
${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_fp_sign.hpp
${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_numerical_constant.hpp
${XSIMD_INCLUDE_DIR}/xsimd/math/xsimd_rounding.hpp
${XSIMD_INCLUDE_DIR}/xsimd/memory/xsimd_aligned_allocator.hpp
${XSIMD_INCLUDE_DIR}/xsimd/memory/xsimd_aligned_stack_buffer.hpp
${XSIMD_INCLUDE_DIR}/xsimd/types/xsimd_avx_conversion.hpp
Expand Down
43 changes: 43 additions & 0 deletions include/xsimd/math/xsimd_fp_sign.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/***************************************************************************
* Copyright (c) 2016, Johan Mabille and Sylvain Corlay *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_FP_SIGN_HPP
#define XSIMD_FP_SIGN_HPP

#include "xsimd_numerical_constant.hpp"

namespace xsimd
{

template <class T, std::size_t N>
batch<T, N> bitofsign(const batch<T, N>& x);

template <class T, std::size_t N>
batch<T, N> copysign(const batch<T, N>& x1, const batch<T, N>& x2);

/**************************
* fp_sign implementation *
**************************/

template <class T, std::size_t N>
inline batch<T, N> bitofsign(const batch<T, N>& x)
{
using btype = batch<T, N>;
return x & minuszero<btype>();
}

template <class T, std::size_t N>
inline batch<T, N> copysign(const batch<T, N>& x1, const batch<T, N>& x2)
{
return abs(x1) | bitofsign(x2);
}

}

#endif

94 changes: 94 additions & 0 deletions include/xsimd/math/xsimd_numerical_constant.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/***************************************************************************
* Copyright (c) 2016, Johan Mabille and Sylvain Corlay *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_NUMERICAL_CONSTANT_HPP
#define XSIMD_NUMERICAL_CONSTANT_HPP

#include "../types/xsimd_types_include.hpp"

namespace xsimd
{
template <class T>
constexpr T maxflint() noexcept;

template <class T>
constexpr T minuszero() noexcept;

template <class T>
constexpr T twotonmb() noexcept;

/***************************
* maxflint implementation *
***************************/

template <class T>
constexpr T maxflint() noexcept
{
return T(maxflint<typename T::value_type>());
}

template <>
constexpr float maxflint<float>() noexcept
{
return 16777216.0f;
}

template <>
constexpr double maxflint<double>() noexcept
{
return 9007199254740992.0;
}

/****************************
* minuszero implementation *
****************************/

template <class T>
constexpr T minuszero() noexcept
{
return T(minuszero<typename T::value_type>());
}

template <>
constexpr float minuszero<float>() noexcept
{
return -0.0f;
}

template <>
constexpr double minuszero<double>() noexcept
{
return -0.0;
}

/***************************
* twotonmb implementation *
***************************/

template <class T>
constexpr T twotonmb() noexcept
{
return T(twotonmb<typename T::value_type>());
}

template <>
constexpr float twotonmb<float>() noexcept
{
return 8388608.0f;
}

template <>
constexpr double twotonmb<double>() noexcept
{
return 4503599627370496.0;
}

}

#endif

216 changes: 216 additions & 0 deletions include/xsimd/math/xsimd_rounding.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/***************************************************************************
* Copyright (c) 2016, Johan Mabille and Sylvain Corlay *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_ROUNDING_HPP
#define XSIMD_ROUNDING_HPP

#include <cmath>
#include "xsimd_fp_sign.hpp"
#include "xsimd_numerical_constant.hpp"

namespace xsimd
{

template <class T, std::size_t N>
batch<T, N> ceil(const batch<T, N>& x);

template <class T, std::size_t N>
batch<T, N> floor(const batch<T, N>& x);

template <class T, std::size_t N>
batch<T, N> trunc(const batch<T, N>& x);

template <class T, std::size_t N>
batch<T, N> round(const batch<T, N>& x);

// Contrary to their std counterpart, these functions
// are assume that the rounding mode is FE_TONEAREST

template <class T, std::size_t N>
batch<T, N> nearbyint(const batch<T, N>& x);

template <class T, std::size_t N>
batch<T, N> rint(const batch<T, N>& x);

/**********************
* SSE implementation *
**********************/

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE4_1_VERSION

template <>
inline batch<float, 4> ceil(const batch<float, 4>& x)
{
return _mm_ceil_ps(x);
}

template <>
inline batch<double, 2> ceil(const batch<double, 2>& x)
{
return _mm_ceil_pd(x);
}

template <>
inline batch<float, 4> floor(const batch<float, 4>& x)
{
return _mm_floor_ps(x);
}

template <>
inline batch<double, 2> floor(const batch<double, 2>& x)
{
return _mm_floor_pd(x);
}

template <>
inline batch<float, 4> trunc(const batch<float, 4>& x)
{
return _mm_round_ps(x, _MM_FROUND_TO_ZERO);
}

template <>
inline batch<double, 2> trunc(const batch<double, 2>& x)
{
return _mm_round_pd(x, _MM_FROUND_TO_ZERO);
}

template <>
inline batch<float, 4> nearbyint(const batch<float, 4>& x)
{
return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
}

template<>
inline batch<double, 2> nearbyint(const batch<double, 2>& x)
{
return _mm_round_pd(x, _MM_FROUND_TO_NEAREST_INT);
}

#elif XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE2_VERSION

template <class T, std::size_t N>
inline batch<T, N> ceil(const batch<T, N>& x)
{
using btype = batch<T, N>;
btype tx = trunc(x);
return select(tx < x, tx + btype(1), tx);
}

template <class T, std::size_t N>
inline batch<T, N> floor(const batch<T, N>& x)
{
using btype = batch<T, N>;
btype tx = trunc(x);
return select(tx > x, tx - btype(1), tx);
}

template <>
inline batch<float, 4> trunc(const batch<float, 4>& x)
{
using btype = batch<float, 4>;
return select(abs(x) < maxflint<btype>(), to_float(to_int(x)), x);
}

template <>
inline batch<double, 2> trunc(const batch<double, 2>& x)
{
return batch<double, 2>(std::trunc(x[0]), std::trunc(x[1]));
}

template <class T, std::size_t N>
inline batch<T, N> nearbyint(const batch<T, N>& x)
{
using btype = batch<T, N>;
btype s = bitofsign(x);
btype v = x | s;
btype t2n = twotonmb<btype>();
btype d0 = v + t2n;
return s | select(v < t2n, d0 - t2n, v);
}

#endif

/**********************
* AVX implementation *
**********************/

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX_VERSION

template <>
inline batch<float, 8> ceil(const batch<float, 8>& x)
{
return _mm256_round_ps(x, _MM_FROUND_CEIL);
}

template <>
inline batch<double, 4> ceil(const batch<double, 4>& x)
{
return _mm256_round_pd(x, _MM_FROUND_CEIL);
}

template <>
inline batch<float, 8> floor(const batch<float, 8>& x)
{
return _mm256_round_ps(x, _MM_FROUND_FLOOR);
}

template<>
inline batch<double, 4> floor(const batch<double, 4>& x)
{
return _mm256_round_pd(x, _MM_FROUND_FLOOR);
}

template <>
inline batch<float, 8> trunc<float, 8>(const batch<float, 8>& x)
{
return _mm256_round_ps(x, _MM_FROUND_TO_ZERO);
}

template <>
inline batch<double, 4> trunc<double, 4>(const batch<double, 4>& x)
{
return _mm256_round_pd(x, _MM_FROUND_TO_ZERO);
}

template <>
inline batch<float, 8> nearbyint(const batch<float, 8>& x)
{
return _mm256_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
}

template<>
inline batch<double, 4> nearbyint(const batch<double, 4>& x)
{
return _mm256_round_pd(x, _MM_FROUND_TO_NEAREST_INT);
}

#endif

/**************************
* Generic implementation *
**************************/

template <class T, std::size_t N>
inline batch<T, N> round(const batch<T, N>& x)
{
using btype = batch<T, N>;
btype v = abs(x);
btype c = ceil(v);
btype cp = select(c - btype(0.5) > v, c - btype(1), c);
return select(v > maxflint<btype>(), x, copysign(cp, x));
}

template <class T, std::size_t N>
inline batch<T, N> rint(const batch<T, N>& x)
{
return nearbyint(x);
}
}

#endif

2 changes: 1 addition & 1 deletion include/xsimd/types/xsimd_avx_double.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ namespace xsimd

inline batch_bool<double, 4>::operator __m256d() const
{
return *this;
return m_value;
}

inline batch_bool<double, 4> operator&(const batch_bool<double, 4>& lhs, const batch_bool<double, 4>& rhs)
Expand Down
4 changes: 3 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ include_directories(${GTEST_INCLUDE_DIRS})

set(XSIMD_TESTS
main.cpp
test_xsimd.cpp
xsimd_basic_test.hpp
xsimd_basic_test.cpp
xsimd_rounding_test.hpp
xsimd_rounding_test.cpp
xsimd_tester.hpp
xsimd_test_utils.hpp
)
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions test/xsimd_basic_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

#ifndef XSIMD_COMMON_TEST_HPP
#define XSIMD_COMMON_TEST_HPP
#ifndef XSIMD_BASIC_TEST_HPP
#define XSIMD_BASIC_TEST_HPP

#include "xsimd_tester.hpp"
#include "xsimd_test_utils.hpp"
Expand Down

0 comments on commit d8629e9

Please sign in to comment.