Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions include/xsimd/math/xsimd_fp_manipulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ namespace xsimd
template <class T, std::size_t N>
batch<T, N> frexp(const batch<T, N>& arg, batch<as_integer_t<T>, N>& exp);

template <typename T>
as_float_t<T> fexp_to_float(const T& x);

/********************************************************
* Floating point manipulation functions implementation *
********************************************************/
Expand Down Expand Up @@ -66,6 +69,27 @@ namespace xsimd
exp = select(bool_cast(arg != b_type(0.)), exp, zero<i_type>());
return select((arg != b_type(0.)), x | bitwise_cast<b_type>(mask2frexp<b_type>()), b_type(0.));
}

template <typename T>
inline as_float_t<T> fexp_to_float(const T& x)
{
return to_float(x);
}

// fexps are 8/11-bit numbers. so we can down-cast to int16_t/int32_t then do the job
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX_VERSION && XSIMD_X86_INSTR_SET < XSIMD_X86_AVX512_VERSION
inline batch<double, 4> fexp_to_float(const batch<int64_t, 4>& x)
{
return _mm256_cvtepi32_pd(detail::xsimd_cvtepi64_epi32(x));
}
#endif

#if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE2_VERSION && XSIMD_X86_INSTR_SET < XSIMD_X86_AVX512_VERSION
inline batch<double, 2> fexp_to_float(const batch<int64_t, 2>& x)
{
return _mm_cvtepi32_pd(_mm_shuffle_epi32(x, _MM_SHUFFLE(0, 0, 2, 0)));
}
#endif
}

#endif
16 changes: 8 additions & 8 deletions include/xsimd/math/xsimd_logarithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ namespace xsimd
B t2 = z * horner<B, 0x3f2aaaaa, 0x3e91e9ee>(w);
B R = t2 + t1;
B hfsq = B(0.5) * f * f;
B dk = to_float(k);
B dk = fexp_to_float(k);
B r = fma(dk, log_2hi<B>(), fma(s, (hfsq + R), dk * log_2lo<B>()) - hfsq + f);
#ifndef XSIMD_NO_INFINITIES
B zz = select(isnez, select(a == infinity<B>(), infinity<B>(), r), minusinfinity<B>());
Expand Down Expand Up @@ -136,7 +136,7 @@ namespace xsimd
#endif
hx += 0x3ff00000 - 0x3fe6a09e;
k += (hx >> 20) - 0x3ff;
B dk = to_float(k);
B dk = fexp_to_float(k);
hx = (hx & i_type(0x000fffff)) + 0x3fe6a09e;
x = bitwise_cast<B>(hx << 32 | (i_type(0xffffffff) & bitwise_cast<i_type>(x)));

Expand Down Expand Up @@ -223,7 +223,7 @@ namespace xsimd
B t2 = z * horner<B, 0x3f2aaaaa, 0x3e91e9ee>(w);
B R = t1 + t2;
B hfsq = B(0.5) * f * f;
B dk = to_float(k);
B dk = fexp_to_float(k);
B r = fma(fms(s, hfsq + R, hfsq) + f, invlog_2<B>(), dk);
#ifndef XSIMD_NO_INFINITIES
B zz = select(isnez, select(a == infinity<B>(), infinity<B>(), r), minusinfinity<B>());
Expand Down Expand Up @@ -287,7 +287,7 @@ namespace xsimd
B lo = fma(s, hfsq + R, f - hi - hfsq);
B val_hi = hi * invlog_2hi<B>();
B val_lo = fma(lo + hi, invlog_2lo<B>(), lo * invlog_2hi<B>());
B dk = to_float(k);
B dk = fexp_to_float(k);
B w1 = dk + val_hi;
val_lo += (dk - w1) + val_hi;
val_hi = w1;
Expand Down Expand Up @@ -362,7 +362,7 @@ namespace xsimd
B t1 = w * horner<B, 0x3eccce13, 0x3e789e26>(w);
B t2 = z * horner<B, 0x3f2aaaaa, 0x3e91e9ee>(w);
B R = t2 + t1;
B dk = to_float(k);
B dk = fexp_to_float(k);
B hfsq = B(0.5) * f * f;
B hibits = f - hfsq;
hibits &= bitwise_cast<B>(i_type(0xfffff000));
Expand Down Expand Up @@ -419,7 +419,7 @@ namespace xsimd
hx = (hx & i_type(0x000fffff)) + 0x3fe6a09e;
x = bitwise_cast<B>(hx << 32 | (i_type(0xffffffff) & bitwise_cast<i_type>(x)));
B f = --x;
B dk = to_float(k);
B dk = fexp_to_float(k);
B s = f / (B(2.) + f);
B z = s * s;
B w = z * z;
Expand Down Expand Up @@ -498,7 +498,7 @@ namespace xsimd
B t2 = z * horner<B, 0x3f2aaaaa, 0x3e91e9ee>(w);
B R = t2 + t1;
B hfsq = B(0.5) * f * f;
B dk = to_float(k);
B dk = fexp_to_float(k);
/* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
B c = select(bool_cast(k >= i_type(2)), B(1.) - (uf - a), a - (uf - B(1.))) / uf;
B r = fma(dk, log_2hi<B>(), fma(s, (hfsq + R), dk * log_2lo<B>() + c) - hfsq + f);
Expand Down Expand Up @@ -550,7 +550,7 @@ namespace xsimd
0x3fc7466496cb03dell,
0x3fc2f112df3e5244ll>(w);
B R = t2 + t1;
B dk = to_float(k);
B dk = fexp_to_float(k);
B r = fma(dk, log_2hi<B>(), fma(s, hfsq + R, dk * log_2lo<B>() + c) - hfsq + f);
#ifndef XSIMD_NO_INFINITIES
B zz = select(isnez, select(a == infinity<B>(), infinity<B>(), r), minusinfinity<B>());
Expand Down
9 changes: 9 additions & 0 deletions include/xsimd/types/xsimd_int_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ namespace xsimd
__m128i xsimd_cvtepi32_epi16(__m256i a);
__m128i xsimd_cvtepi32_epu16(__m256i a);

__m128i xsimd_cvtepi64_epi32(__m256i a);

/******************
* Implementation *
******************/
Expand Down Expand Up @@ -165,6 +167,13 @@ namespace xsimd
#endif
return res;
}

inline __m128i xsimd_cvtepi64_epi32(__m256i a)
{
__m128 hi = _mm_castsi128_ps(_mm256_extractf128_si256(a, 1));
__m128 lo = _mm_castsi128_ps(_mm256_castsi256_si128(a));
return _mm_castps_si128(_mm_shuffle_ps(lo, hi, _MM_SHUFFLE(2, 0, 2, 0)));
}
}
}

Expand Down