-
Notifications
You must be signed in to change notification settings - Fork 22
/
partition_neon.c
42 lines (31 loc) · 1.06 KB
/
partition_neon.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include "config.h"
#include <misc/partition.h>
#include <stdbool.h>
#include <misc/stddef.h>
#include <misc/intrinsic.h>
#define __USE_NEON__ 1
// from https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
int vmovemask_u8(uint16x8_t input)
{
const uint8_t __attribute__ ((aligned (16))) ucShift[] = {-7,-6,-5,-4,-3,-2,-1,0,-7,-6,-5,-4,-3,-2,-1,0};
uint8x16_t vshift = vld1q_u8(ucShift);
uint8x16_t vmask = vandq_u8(input, vdupq_n_u8(0x80));
uint32_t out;
vmask = vshlq_u8(vmask, vshift);
out = vaddv_u8(vget_low_u8(vmask));
out += (vaddv_u8(vget_high_u8(vmask)) << 8);
return out;
}
#define PARTITION_RANGE 1
size_t misc_partitionRange_neon(const misc_xint_t* restrict x, misc_xint_t cut, size_t* restrict indices, size_t length)
{
#include "partition_body.c"
}
#undef PARTITION_RANGE
#define PARTITION_RANGE 0
size_t misc_partitionIndices_neon(const misc_xint_t* restrict x, misc_xint_t cut, size_t* restrict indices, size_t length)
{
# include "partition_body.c"
}
#undef PARTITION_RANGE
#undef __USE_NEON__