-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathcompv_simd_globals.cxx
executable file
·294 lines (243 loc) · 14.1 KB
/
compv_simd_globals.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
/* Copyright (C) 2011-2020 Doubango Telecom <https://www.doubango.org>
* File author: Mamadou DIOP (Doubango Telecom, France).
* License: GPLv3. For commercial license please contact us.
* Source code: https://github.com/DoubangoTelecom/compv
* WebSite: http://compv.org
*/
#include "compv/base/compv_simd_globals.h"
#include "compv/base/intrin/x86/compv_intrin_avx.h"
#include "compv/base/intrin/x86/compv_intrin_sse.h"
#include "compv/base/math/compv_math.h"
COMPV_ALIGN_DEFAULT() uint8_t k0_8u[] = { //!\\ You should use 'pxor' instruction which is faster than movdqa to load zeros in xmm register
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
COMPV_ALIGN_DEFAULT() uint8_t k_0_0_0_255_8u[] = {
0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255,
0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255,
};
COMPV_ALIGN_DEFAULT() int8_t k1_8s[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
COMPV_ALIGN_DEFAULT() int8_t k2_8s[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
COMPV_ALIGN_DEFAULT() int8_t k3_8s[] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
};
COMPV_ALIGN_DEFAULT() int8_t k5_8s[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
COMPV_ALIGN_DEFAULT() int8_t k15_8s[] = {
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
};
COMPV_ALIGN_DEFAULT() int8_t k16_8s[] = {
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
};
COMPV_ALIGN_DEFAULT() int8_t k85_8s[] = {
85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85,
};
COMPV_ALIGN_DEFAULT() int8_t k127_8s[] = {
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
};
COMPV_ALIGN_DEFAULT() uint8_t k128_8u[] = {
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
};
COMPV_ALIGN_DEFAULT() uint8_t k171_8u[] = {
171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171,
171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171,
};
COMPV_ALIGN_DEFAULT() uint8_t k254_8u[] = { // 254 = FE = 11111110, not(254) = 00000001 -> useful to select first or last bit, there is no shift_epi8(7) in SSE
254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
};
COMPV_ALIGN_DEFAULT() uint8_t k255_8u[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
COMPV_ALIGN_DEFAULT() int16_t k13_26_16s[] = {
13, 26, 13, 26, 13, 26, 13, 26,
13, 26, 13, 26, 13, 26, 13, 26,
};
COMPV_ALIGN_DEFAULT() int16_t k16_16s[] = {
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
};
COMPV_ALIGN_DEFAULT() int16_t k37_16s[] = {
37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37,
};
COMPV_ALIGN_DEFAULT() int16_t k51_16s[] = {
51, 51, 51, 51, 51, 51, 51, 51,
51, 51, 51, 51, 51, 51, 51, 51,
};
COMPV_ALIGN_DEFAULT() int16_t k65_16s[] = {
65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65,
};
COMPV_ALIGN_DEFAULT() int16_t k127_16s[] = {
127, 127, 127, 127, 127, 127, 127, 127,
127, 127, 127, 127, 127, 127, 127, 127,
};
COMPV_ALIGN_DEFAULT() int16_t k128_16s[] = {
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
};
COMPV_ALIGN_DEFAULT() int16_t k255_16s[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
COMPV_ALIGN_DEFAULT() int16_t k7120_16s[] = {
7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120,
7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120, 7120,
};
COMPV_ALIGN_DEFAULT() int16_t k8912_16s[] = {
8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912,
8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912, 8912,
};
COMPV_ALIGN_DEFAULT() int16_t k4400_16s[] = {
4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400,
4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400, 4400,
};
static const COMPV_NAMESPACE::compv_float32_t atan2_eps = static_cast<COMPV_NAMESPACE::compv_float32_t>(COMPV_NAMESPACE::kMathTrigAtan2Eps);
static const COMPV_NAMESPACE::compv_float32_t atan2_p1 = static_cast<COMPV_NAMESPACE::compv_float32_t>(COMPV_NAMESPACE::kMathTrigAtan2P1);
static const COMPV_NAMESPACE::compv_float32_t atan2_p3 = static_cast<COMPV_NAMESPACE::compv_float32_t>(COMPV_NAMESPACE::kMathTrigAtan2P3);
static const COMPV_NAMESPACE::compv_float32_t atan2_p5 = static_cast<COMPV_NAMESPACE::compv_float32_t>(COMPV_NAMESPACE::kMathTrigAtan2P5);
static const COMPV_NAMESPACE::compv_float32_t atan2_p7 = static_cast<COMPV_NAMESPACE::compv_float32_t>(COMPV_NAMESPACE::kMathTrigAtan2P7);
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t kAtan2Eps_32f[] = {
atan2_eps, atan2_eps, atan2_eps, atan2_eps,
atan2_eps, atan2_eps, atan2_eps, atan2_eps,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t kAtan2P1_32f[] = {
atan2_p1, atan2_p1, atan2_p1, atan2_p1,
atan2_p1, atan2_p1, atan2_p1, atan2_p1,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t kAtan2P3_32f[] = {
atan2_p3, atan2_p3, atan2_p3, atan2_p3,
atan2_p3, atan2_p3, atan2_p3, atan2_p3,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t kAtan2P5_32f[] = {
atan2_p5, atan2_p5, atan2_p5, atan2_p5,
atan2_p5, atan2_p5, atan2_p5, atan2_p5,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t kAtan2P7_32f[] = {
atan2_p7, atan2_p7, atan2_p7, atan2_p7,
atan2_p7, atan2_p7, atan2_p7, atan2_p7,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t k43_32f[] = {
43.f, 43.f, 43.f, 43.f,
43.f, 43.f, 43.f, 43.f,
};
COMPV_ALIGN_DEFAULT() COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t k90_32f[] = {
90.f, 90.f, 90.f, 90.f,
90.f, 90.f, 90.f, 90.f,
};
COMPV_ALIGN_DEFAULT() COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t k180_32f[] = {
180.f, 180.f, 180.f, 180.f,
180.f, 180.f, 180.f, 180.f,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t k255_32f[] = {
255.f, 255.f, 255.f, 255.f,
255.f, 255.f, 255.f, 255.f,
};
COMPV_ALIGN_DEFAULT() COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float32_t k360_32f[] = {
360.f, 360.f, 360.f, 360.f,
360.f, 360.f, 360.f, 360.f,
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float64_t ksqrt2_64f[] = {
COMPV_MATH_SQRT_2, COMPV_MATH_SQRT_2, // SSE
COMPV_MATH_SQRT_2, COMPV_MATH_SQRT_2 // AVX
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float64_t km1_64f[] = {
-1., -1., // SSE
-1., -1., // AVX
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float64_t km1_0_64f[] = {
-1., 0., // SSE
-1., 0. // AVX
};
COMPV_ALIGN_DEFAULT() COMPV_NAMESPACE::compv_float64_t k1_64f[] = {
1., 1., // SSE
1., 1., // AVX
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Popcnt_32s[] = { // To be used with _mm_shuffle_epi8 for popcnt computation
COMPV_MM_SHUFFLE_EPI8(2, 1, 1, 0), COMPV_MM_SHUFFLE_EPI8(3, 2, 2, 1), COMPV_MM_SHUFFLE_EPI8(3, 2, 2, 1), COMPV_MM_SHUFFLE_EPI8(4, 3, 3, 2), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(2, 1, 1, 0), COMPV_MM_SHUFFLE_EPI8(3, 2, 2, 1), COMPV_MM_SHUFFLE_EPI8(3, 2, 2, 1), COMPV_MM_SHUFFLE_EPI8(4, 3, 3, 2), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Deinterleave8uL2_32s[] = { // To be used with _mm_shuffle_epi8, use vld2.u8/vst2.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(6, 4, 2, 0), COMPV_MM_SHUFFLE_EPI8(14, 12, 10, 8), COMPV_MM_SHUFFLE_EPI8(7, 5, 3, 1), COMPV_MM_SHUFFLE_EPI8(15, 13, 11, 9), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(6, 4, 2, 0), COMPV_MM_SHUFFLE_EPI8(14, 12, 10, 8), COMPV_MM_SHUFFLE_EPI8(7, 5, 3, 1), COMPV_MM_SHUFFLE_EPI8(15, 13, 11, 9), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Deinterleave16uL2_32s[] = { // To be used with _mm_shuffle_epi8, use vld2.u16/vst2.u16 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(5, 4, 1, 0), COMPV_MM_SHUFFLE_EPI8(13, 12, 9, 8), COMPV_MM_SHUFFLE_EPI8(7, 6, 3, 2), COMPV_MM_SHUFFLE_EPI8(15, 14, 11, 10), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(5, 4, 1, 0), COMPV_MM_SHUFFLE_EPI8(13, 12, 9, 8), COMPV_MM_SHUFFLE_EPI8(7, 6, 3, 2), COMPV_MM_SHUFFLE_EPI8(15, 14, 11, 10), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Deinterleave8uL3_32s[] = { // To be used with _mm_shuffle_epi8, use vld3.u8/vst3.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(9, 6, 3, 0), COMPV_MM_SHUFFLE_EPI8(4, 1, 15, 12), COMPV_MM_SHUFFLE_EPI8(2, 13, 10, 7), COMPV_MM_SHUFFLE_EPI8(14, 11, 8, 5), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(9, 6, 3, 0), COMPV_MM_SHUFFLE_EPI8(4, 1, 15, 12), COMPV_MM_SHUFFLE_EPI8(2, 13, 10, 7), COMPV_MM_SHUFFLE_EPI8(14, 11, 8, 5), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Interleave8uL3_Step0_s32[] = { // To be used with _mm_shuffle_epi8, use vld3.u8/vst3.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(2, 11, 1, 0), COMPV_MM_SHUFFLE_EPI8(5, 4, 12, 3), COMPV_MM_SHUFFLE_EPI8(14, 7, 6, 13), COMPV_MM_SHUFFLE_EPI8(10, 15, 9, 8), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(2, 11, 1, 0), COMPV_MM_SHUFFLE_EPI8(5, 4, 12, 3), COMPV_MM_SHUFFLE_EPI8(14, 7, 6, 13), COMPV_MM_SHUFFLE_EPI8(10, 15, 9, 8), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Interleave8uL3_Step1_s32[] = { // To be used with _mm_shuffle_epi8, use vld3.u8/vst3.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(2, 1, 11, 0), COMPV_MM_SHUFFLE_EPI8(13, 4, 3, 12), COMPV_MM_SHUFFLE_EPI8(7, 14, 6, 5), COMPV_MM_SHUFFLE_EPI8(10, 9, 15, 8), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(2, 1, 11, 0), COMPV_MM_SHUFFLE_EPI8(13, 4, 3, 12), COMPV_MM_SHUFFLE_EPI8(7, 14, 6, 5), COMPV_MM_SHUFFLE_EPI8(10, 9, 15, 8), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Interleave8uL3_Step2_s32[] = { // To be used with _mm_shuffle_epi8, use vld3.u8/vst3.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(11, 1, 0, 10), COMPV_MM_SHUFFLE_EPI8(4, 12, 3, 2), COMPV_MM_SHUFFLE_EPI8(7, 6, 13, 5), COMPV_MM_SHUFFLE_EPI8(15, 9, 8, 14), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(11, 1, 0, 10), COMPV_MM_SHUFFLE_EPI8(4, 12, 3, 2), COMPV_MM_SHUFFLE_EPI8(7, 6, 13, 5), COMPV_MM_SHUFFLE_EPI8(15, 9, 8, 14), // 256bits AVX register
};
COMPV_ALIGN_DEFAULT() int32_t kShuffleEpi8_Deinterleave8uL4_32s[] = { // To be used with _mm_shuffle_epi8, use vld4.u8/vst4.u8 for ARM NEON
COMPV_MM_SHUFFLE_EPI8(14, 10, 12, 8), COMPV_MM_SHUFFLE_EPI8(15, 11, 13, 9), COMPV_MM_SHUFFLE_EPI8(6, 2, 4, 0), COMPV_MM_SHUFFLE_EPI8(7, 3, 5, 1), // 128bits SSE register
COMPV_MM_SHUFFLE_EPI8(14, 10, 12, 8), COMPV_MM_SHUFFLE_EPI8(15, 11, 13, 9), COMPV_MM_SHUFFLE_EPI8(6, 2, 4, 0), COMPV_MM_SHUFFLE_EPI8(7, 3, 5, 1), // 256bits AVX register
};
#if COMPV_ARCH_X86
COMPV_ALIGN_DEFAULT() uint64_t kAVXMaskstore_0_64u[] = { // use with _mm256_maskload
0xF000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
};
COMPV_ALIGN_DEFAULT() uint64_t kAVXMaskstore_0_1_64u[] = { // use with _mm256_maskload
0xF000000000000000, 0xF000000000000000, 0x0000000000000000, 0x0000000000000000
};
COMPV_ALIGN_DEFAULT() uint32_t kAVXMaskstore_0_32u[] = { // use with _mm256_maskload
0xF0000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
};
COMPV_ALIGN_DEFAULT() uint64_t kAVXMaskzero_3_64u[] = { // mask to zero the last 64bits - use with _mm256_and
0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000,
};
COMPV_ALIGN_DEFAULT() uint64_t kAVXMaskzero_2_3_64u[] = { // mask to zero the last two 64bits (128) - use with _mm256_and
0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000,
};
COMPV_ALIGN_DEFAULT() uint64_t kAVXMaskzero_1_2_3_64u[] = { // mask to zero the last three 64bits (192) - use with _mm256_and
0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
};
COMPV_ALIGN_DEFAULT() int32_t kAVXPermutevar8x32_AEBFCGDH_32s[] = { // = k_0_4_1_5_2_6_3_7
COMPV_AVX_A64, COMPV_AVX_E64, COMPV_AVX_B64, COMPV_AVX_F64, COMPV_AVX_C64, COMPV_AVX_G64, COMPV_AVX_D64, COMPV_AVX_H64
};
COMPV_ALIGN_DEFAULT() int32_t kAVXPermutevar8x32_ABCDDEFG_32s[] = { // = k_0_1_2_3_3_4_5_6
COMPV_AVX_A64, COMPV_AVX_B64, COMPV_AVX_C64, COMPV_AVX_D64, COMPV_AVX_D64, COMPV_AVX_E64, COMPV_AVX_F64, COMPV_AVX_G64
};
COMPV_ALIGN_DEFAULT() int32_t kAVXPermutevar8x32_CDEFFGHX_32s[] = { // = k_2_3_4_5_5_6_7_X
COMPV_AVX_C64, COMPV_AVX_D64, COMPV_AVX_E64, COMPV_AVX_F64, COMPV_AVX_F64, COMPV_AVX_G64, COMPV_AVX_H64, 0xFF
};
COMPV_ALIGN_DEFAULT() int32_t kAVXPermutevar8x32_XXABBCDE_32s[] = { // = k_X_X_0_1_1_2_3_4
0xFF, 0xFF, COMPV_AVX_A64, COMPV_AVX_B64, COMPV_AVX_B64, COMPV_AVX_C64, COMPV_AVX_D64, COMPV_AVX_E64
};
COMPV_ALIGN_DEFAULT() uint32_t kAVXFloat64MaskAbs[] = { // Mask used for operations to compute the absolute value -> and(xmm, mask)
0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff, // SSE
0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff // AVX
};
COMPV_ALIGN_DEFAULT() uint32_t kAVXFloat64MaskNegate[] = { // Mask used for operations to negate values -> xor(xmm, mask)
0x00000000, 0x80000000, 0x00000000, 0x80000000, // SSE
0x00000000, 0x80000000, 0x00000000, 0x80000000 // AVX
};
#endif /* COMPV_ARCH_X86 */