Permalink
Browse files

[droid] neon runtime split for AEConvert

  • Loading branch information...
1 parent 8763c16 commit 5da938082797c82c16866e50a31c0b16bf8273c9 Cory Fields committed Jul 12, 2012
Showing with 88 additions and 52 deletions.
  1. +82 −52 xbmc/cores/AudioEngine/Utils/AEConvert.cpp
  2. +6 −0 xbmc/cores/AudioEngine/Utils/AEConvert.h
@@ -41,6 +41,7 @@
#ifdef __ARM_NEON__
#include <arm_neon.h>
+#include "utils/CPUInfo.h"
#endif
#define CLAMP(x) std::min(-1.0f, std::max(1.0f, (float)(x)))
@@ -89,8 +90,15 @@ CAEConvert::AEConvertToFn CAEConvert::ToFloat(enum AEDataFormat dataFormat)
case AE_FMT_S24BE4: return &S24BE4_Float;
case AE_FMT_S24LE3: return &S24LE3_Float;
case AE_FMT_S24BE3: return &S24BE3_Float;
+#if defined(__ARM_NEON__)
+ case AE_FMT_S32LE :
+ return (g_cpuInfo.GetCPUFeatures() & CPU_FEATURE_NEON) ? &S32LE_Float_Neon : S32LE_Float;
+ case AE_FMT_S32BE :
+ return (g_cpuInfo.GetCPUFeatures() & CPU_FEATURE_NEON) ? &S32BE_Float_Neon : S32BE_Float;
+#else
case AE_FMT_S32LE : return &S32LE_Float;
case AE_FMT_S32BE : return &S32BE_Float;
+#endif
case AE_FMT_DOUBLE: return &DOUBLE_Float;
default:
return NULL;
@@ -114,8 +122,15 @@ CAEConvert::AEConvertFrFn CAEConvert::FrFloat(enum AEDataFormat dataFormat)
case AE_FMT_S16BE : return &Float_S16BE;
case AE_FMT_S24NE4: return &Float_S24NE4;
case AE_FMT_S24NE3: return &Float_S24NE3;
+#if defined(__ARM_NEON__)
+ case AE_FMT_S32LE :
+ return (g_cpuInfo.GetCPUFeatures() & CPU_FEATURE_NEON) ? &Float_S32LE_Neon : Float_S32LE;
+ case AE_FMT_S32BE :
+ return (g_cpuInfo.GetCPUFeatures() & CPU_FEATURE_NEON) ? &Float_S32BE_Neon : Float_S32BE;
+#else
case AE_FMT_S32LE : return &Float_S32LE;
case AE_FMT_S32BE : return &Float_S32BE;
+#endif
case AE_FMT_DOUBLE: return &Float_DOUBLE;
default:
return NULL;
@@ -249,7 +264,27 @@ unsigned int CAEConvert::S32LE_Float(uint8_t *data, const unsigned int samples,
static const float factor = 1.0f / (float)INT32_MAX;
int32_t *src = (int32_t*)data;
+ /* do this in groups of 4 to give the compiler a better chance of optimizing this */
+ for (float *end = dest + (samples & ~0x3); dest < end; src += 4, dest += 4)
+ {
+ dest[0] = (float)Endian_SwapLE32(src[0]) * factor;
+ dest[1] = (float)Endian_SwapLE32(src[1]) * factor;
+ dest[2] = (float)Endian_SwapLE32(src[2]) * factor;
+ dest[3] = (float)Endian_SwapLE32(src[3]) * factor;
+ }
+
+ /* process any remaining samples */
+ for (float *end = dest + (samples & 0x3); dest < end; ++src, ++dest)
+ dest[0] = (float)Endian_SwapLE32(src[0]) * factor;
+
+ return samples;
+}
+
+unsigned int CAEConvert::S32LE_Float_Neon(uint8_t *data, const unsigned int samples, float *dest)
+{
#if defined(__ARM_NEON__)
+ static const float factor = 1.0f / (float)INT32_MAX;
+ int32_t *src = (int32_t*)data;
/* groups of 4 samples */
for (float *end = dest + (samples & ~0x3); dest < end; src += 4, dest += 4)
@@ -279,33 +314,37 @@ unsigned int CAEConvert::S32LE_Float(uint8_t *data, const unsigned int samples,
if (samples & 0x1)
dest[0] = (float)src[0] * factor;
-#else /* !defined(__ARM_NEON__) */
+#endif /* !defined(__ARM_NEON__) */
+ return samples;
+}
+
+unsigned int CAEConvert::S32BE_Float(uint8_t *data, const unsigned int samples, float *dest)
+{
+ static const float factor = 1.0f / (float)INT32_MAX;
+ int32_t *src = (int32_t*)data;
/* do this in groups of 4 to give the compiler a better chance of optimizing this */
for (float *end = dest + (samples & ~0x3); dest < end; src += 4, dest += 4)
{
- dest[0] = (float)Endian_SwapLE32(src[0]) * factor;
- dest[1] = (float)Endian_SwapLE32(src[1]) * factor;
- dest[2] = (float)Endian_SwapLE32(src[2]) * factor;
- dest[3] = (float)Endian_SwapLE32(src[3]) * factor;
+ dest[0] = (float)Endian_SwapBE32(src[0]) * factor;
+ dest[1] = (float)Endian_SwapBE32(src[1]) * factor;
+ dest[2] = (float)Endian_SwapBE32(src[2]) * factor;
+ dest[3] = (float)Endian_SwapBE32(src[3]) * factor;
}
/* process any remaining samples */
for (float *end = dest + (samples & 0x3); dest < end; ++src, ++dest)
- dest[0] = (float)Endian_SwapLE32(src[0]) * factor;
-
-#endif
+ dest[0] = (float)Endian_SwapBE32(src[0]) * factor;
return samples;
}
-unsigned int CAEConvert::S32BE_Float(uint8_t *data, const unsigned int samples, float *dest)
+unsigned int CAEConvert::S32BE_Float_Neon(uint8_t *data, const unsigned int samples, float *dest)
{
+#if defined(__ARM_NEON__)
static const float factor = 1.0f / (float)INT32_MAX;
int32_t *src = (int32_t*)data;
-#if defined(__ARM_NEON__)
-
/* groups of 4 samples */
for (float *end = dest + (samples & ~0x3); dest < end; src += 4, dest += 4)
{
@@ -334,23 +373,7 @@ unsigned int CAEConvert::S32BE_Float(uint8_t *data, const unsigned int samples,
if (samples & 0x1)
dest[0] = (float)src[0] * factor;
-#else /* !defined(__ARM_NEON__) */
-
- /* do this in groups of 4 to give the compiler a better chance of optimizing this */
- for (float *end = dest + (samples & ~0x3); dest < end; src += 4, dest += 4)
- {
- dest[0] = (float)Endian_SwapBE32(src[0]) * factor;
- dest[1] = (float)Endian_SwapBE32(src[1]) * factor;
- dest[2] = (float)Endian_SwapBE32(src[2]) * factor;
- dest[3] = (float)Endian_SwapBE32(src[3]) * factor;
- }
-
- /* process any remaining samples */
- for (float *end = dest + (samples & 0x3); dest < end; ++src, ++dest)
- dest[0] = (float)Endian_SwapBE32(src[0]) * factor;
-
-#endif
-
+#endif /* !defined(__ARM_NEON__) */
return samples;
}
@@ -966,9 +989,23 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
}
}
_mm_empty();
+ #else
+
+ /* no SIMD */
+ for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
+ {
+ dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = Endian_SwapLE32(dst[0]);
+ }
+ #endif
+ return samples << 2;
+}
- #elif defined(__ARM_NEON__)
+unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int samples, uint8_t *dest)
+{
+#if defined(__ARM_NEON__)
+ int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
@@ -996,17 +1033,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = Endian_SwapLE32(dst[0]);
}
-
- #else
-
- /* no SIMD */
- for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
- {
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
- dst[0] = Endian_SwapLE32(dst[0]);
- }
- #endif
-
+#endif
return samples << 2;
}
@@ -1071,9 +1098,22 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
}
}
_mm_empty();
+ #else
+ /* no SIMD */
+ for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
+ {
+ dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = Endian_SwapBE32(dst[0]);
+ }
+ #endif
- #elif defined(__ARM_NEON__)
+ return samples << 2;
+}
+unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int samples, uint8_t *dest)
+{
+#if defined(__ARM_NEON__)
+ int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
@@ -1101,17 +1141,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
dst[0] = safeRound(data[0] * (float)INT32_MAX);
dst[0] = Endian_SwapBE32(dst[0]);
}
-
- #else
-
- /* no SIMD */
- for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
- {
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
- dst[0] = Endian_SwapBE32(dst[0]);
- }
- #endif
-
+#endif
return samples << 2;
}
@@ -48,6 +48,12 @@ class CAEConvert{
static unsigned int Float_S32LE (float *data, const unsigned int samples, uint8_t *dest);
static unsigned int Float_S32BE (float *data, const unsigned int samples, uint8_t *dest);
static unsigned int Float_DOUBLE(float *data, const unsigned int samples, uint8_t *dest);
+
+ static unsigned int S32LE_Float_Neon (uint8_t *data, const unsigned int samples, float *dest);
+ static unsigned int S32BE_Float_Neon (uint8_t *data, const unsigned int samples, float *dest);
+ static unsigned int Float_S32LE_Neon (float *data, const unsigned int samples, uint8_t *dest);
+ static unsigned int Float_S32BE_Neon (float *data, const unsigned int samples, uint8_t *dest);
+
public:
typedef unsigned int (*AEConvertToFn)(uint8_t *data, const unsigned int samples, float *dest);
typedef unsigned int (*AEConvertFrFn)(float *data, const unsigned int samples, uint8_t *dest);

0 comments on commit 5da9380

Please sign in to comment.