Skip to content

Commit

Permalink
Merge pull request #2748 from bobo1on1/32bitmul
Browse files Browse the repository at this point in the history
Was tested, works.
  • Loading branch information
bobo1on1 committed May 14, 2013
2 parents 6f0bfc7 + edf2e3d commit 27044a4
Showing 1 changed file with 18 additions and 14 deletions.
32 changes: 18 additions & 14 deletions xbmc/cores/AudioEngine/Utils/AEConvert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,17 +922,21 @@ unsigned int CAEConvert::Float_S24NE3(float *data, const unsigned int samples, u
return samples * 3;
}

//float can't store INT32_MAX, it gets rounded up to INT32_MAX + 1
//INT32_MAX - 127 is the maximum value that can exactly be stored in both 32 bit float and int
#define AE_MUL32 ((float)(INT32_MAX - 127))

unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, uint8_t *dest)
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
const __m128 mul = _mm_set_ps1((float)INT32_MAX);
const __m128 mul = _mm_set_ps1(AE_MUL32);
unsigned int count = samples;

/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
++data;
++dst;
--count;
Expand All @@ -955,7 +959,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
else
Expand Down Expand Up @@ -988,7 +992,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
Expand All @@ -1002,7 +1006,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
Expand All @@ -1012,7 +1016,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample

if (samples & 0x2)
{
float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64_s32(ret);
Expand All @@ -1024,7 +1028,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample

if (samples & 0x1)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
Expand All @@ -1035,13 +1039,13 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
const __m128 mul = _mm_set_ps1((float)INT32_MAX);
const __m128 mul = _mm_set_ps1(AE_MUL32);
unsigned int count = samples;

/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
++data;
++dst;
--count;
Expand All @@ -1064,7 +1068,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
else
Expand Down Expand Up @@ -1096,7 +1100,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
Expand All @@ -1110,7 +1114,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
Expand All @@ -1120,7 +1124,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample

if (samples & 0x2)
{
float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64_s32(ret);
Expand All @@ -1132,7 +1136,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample

if (samples & 0x1)
{
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
Expand Down

0 comments on commit 27044a4

Please sign in to comment.