Merge pull request #2748 from bobo1on1/32bitmul

Was tested, works.
xbmc · May 14, 2013 · 27044a4 · 27044a4
2 parents 6f0bfc7 + edf2e3d
commit 27044a4
Showing 1 changed file with 18 additions and 14 deletions.
diff --git a/xbmc/cores/AudioEngine/Utils/AEConvert.cpp b/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
@@ -922,17 +922,21 @@ unsigned int CAEConvert::Float_S24NE3(float *data, const unsigned int samples, u
   return samples * 3;
 }
 
+//float can't store INT32_MAX, it gets rounded up to INT32_MAX + 1
+//INT32_MAX - 127 is the maximum value that can exactly be stored in both 32 bit float and int
+#define AE_MUL32 ((float)(INT32_MAX - 127))
+
 unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, uint8_t *dest)
 {
   int32_t *dst = (int32_t*)dest;
   #ifdef __SSE__
-  const __m128 mul = _mm_set_ps1((float)INT32_MAX);
+  const __m128 mul = _mm_set_ps1(AE_MUL32);
   unsigned int count = samples;
 
   /* work around invalid alignment */
   while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     ++data;
     ++dst;
     --count;
@@ -955,7 +959,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
     const uint32_t odd = samples - even;
     if (odd == 1)
     {
-      dst[0] = safeRound(data[0] * (float)INT32_MAX);
+      dst[0] = safeRound(data[0] * AE_MUL32);
       dst[0] = Endian_SwapLE32(dst[0]);
     }
     else
@@ -988,7 +992,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
   /* no SIMD */
   for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     dst[0] = Endian_SwapLE32(dst[0]);
   }
   #endif
@@ -1002,7 +1006,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
   int32_t *dst = (int32_t*)dest;
   for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
   {
-    float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
+    float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
     int32x4_t   ret = vcvtq_s32_f32(val);
     #ifdef __BIG_ENDIAN__
     ret = vrev64q_s32(ret);
@@ -1012,7 +1016,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
 
   if (samples & 0x2)
   {
-    float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
+    float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
     int32x2_t   ret = vcvt_s32_f32(val);
     #ifdef __BIG_ENDIAN__
     ret = vrev64_s32(ret);
@@ -1024,7 +1028,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
 
   if (samples & 0x1)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     dst[0] = Endian_SwapLE32(dst[0]);
   }
 #endif
@@ -1035,13 +1039,13 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
 {
   int32_t *dst = (int32_t*)dest;
   #ifdef __SSE__
-  const __m128 mul = _mm_set_ps1((float)INT32_MAX);
+  const __m128 mul = _mm_set_ps1(AE_MUL32);
   unsigned int count = samples;
 
   /* work around invalid alignment */
   while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     ++data;
     ++dst;
     --count;
@@ -1064,7 +1068,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
     const uint32_t odd = samples - even;
     if (odd == 1)
     {
-      dst[0] = safeRound(data[0] * (float)INT32_MAX);
+      dst[0] = safeRound(data[0] * AE_MUL32);
       dst[0] = Endian_SwapBE32(dst[0]);
     }
     else
@@ -1096,7 +1100,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
   /* no SIMD */
   for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     dst[0] = Endian_SwapBE32(dst[0]);
   }
   #endif
@@ -1110,7 +1114,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
   int32_t *dst = (int32_t*)dest;
   for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
   {
-    float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
+    float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
     int32x4_t   ret = vcvtq_s32_f32(val);
     #ifndef __BIG_ENDIAN__
     ret = vrev64q_s32(ret);
@@ -1120,7 +1124,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
 
   if (samples & 0x2)
   {
-    float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
+    float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
     int32x2_t   ret = vcvt_s32_f32(val);
     #ifndef __BIG_ENDIAN__
     ret = vrev64_s32(ret);
@@ -1132,7 +1136,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
 
   if (samples & 0x1)
   {
-    dst[0] = safeRound(data[0] * (float)INT32_MAX);
+    dst[0] = safeRound(data[0] * AE_MUL32);
     dst[0] = Endian_SwapBE32(dst[0]);
   }
 #endif