-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathcompv_math_scale.cxx
executable file
·146 lines (131 loc) · 7.2 KB
/
compv_math_scale.cxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/* Copyright (C) 2011-2020 Doubango Telecom <https://www.doubango.org>
* File author: Mamadou DIOP (Doubango Telecom, France).
* License: GPLv3. For commercial license please contact us.
* Source code: https://github.com/DoubangoTelecom/compv
* WebSite: http://compv.org
*/
#include "compv/base/math/compv_math_scale.h"
#include "compv/base/compv_generic_invoke.h"
#include "compv/base/parallel/compv_parallel.h"
#include "compv/base/compv_cpu.h"
#include "compv/base/math/intrin/x86/compv_math_scale_intrin_sse2.h"
#include "compv/base/math/intrin/arm/compv_math_scale_intrin_neon.h"
COMPV_NAMESPACE_BEGIN()
#if COMPV_ASM && COMPV_ARCH_X64
COMPV_EXTERNC void CompVMathScaleScale_64f64f_Asm_X64_SSE2(const compv_float64_t* ptrIn, compv_float64_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const compv_float64_t* s1);
COMPV_EXTERNC void CompVMathScaleScale_64f64f_Asm_X64_AVX(const compv_float64_t* ptrIn, compv_float64_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const compv_float64_t* s1);
COMPV_EXTERNC void CompVMathScaleScale_32f32f_Asm_X64_SSE2(COMPV_ALIGNED(SSE) const compv_float32_t* ptrIn, COMPV_ALIGNED(SSE) compv_float32_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, COMPV_ALIGNED(SSE) const compv_uscalar_t stride, const compv_float32_t* s1);
COMPV_EXTERNC void CompVMathScaleScale_32f32f_Asm_X64_AVX(COMPV_ALIGNED(AVX) const compv_float32_t* ptrIn, COMPV_ALIGNED(AVX) compv_float32_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, COMPV_ALIGNED(AVX) const compv_uscalar_t stride, const compv_float32_t* s1);
#endif /* #if COMPV_ASM && COMPV_ARCH_X64 */
#if COMPV_ASM && COMPV_ARCH_ARM32
COMPV_EXTERNC void CompVMathScaleScale_32f32f_Asm_NEON32(COMPV_ALIGNED(NEON) const compv_float32_t* ptrIn, COMPV_ALIGNED(NEON) compv_float32_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, COMPV_ALIGNED(NEON) const compv_uscalar_t stride, const compv_float32_t* s1);
#endif /* COMPV_ARCH_ARM32 */
#if COMPV_ASM && COMPV_ARCH_ARM64
COMPV_EXTERNC void CompVMathScaleScale_32f32f_Asm_NEON64(COMPV_ALIGNED(NEON) const compv_float32_t* ptrIn, COMPV_ALIGNED(NEON) compv_float32_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, COMPV_ALIGNED(NEON) const compv_uscalar_t stride, const compv_float32_t* s1);
#endif /* COMPV_ARCH_ARM32 */
template<typename T>
static void CompVMathScaleScale_C(const T* ptrIn, T* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const T* s1)
{
COMPV_DEBUG_INFO_CODE_NOT_OPTIMIZED("No SIMD or GPGPU implementation could be found");
const T& s = *s1;
for (compv_uscalar_t j = 0; j < height; ++j) {
for (compv_uscalar_t i = 0; i < width; ++i) {
ptrOut[i] = ptrIn[i] * s;
}
ptrIn += stride;
ptrOut += stride;
}
}
template<typename T>
static COMPV_ERROR_CODE CompVMathScaleScale(const CompVMatPtr &in, const double& s, CompVMatPtrPtr out, const bool enforceSingleThread)
{
const size_t rows = in->rows();
const size_t cols = in->cols();
const size_t stride = in->stride();
CompVMatPtr out_ = *out;
if (out_ != in) { // This function allows "in == out"
COMPV_CHECK_CODE_RETURN(CompVMat::newObj(&out_, in));
}
const T ss = static_cast<T>(s);
auto funcPtr = [&](const size_t ystart, const size_t yend) -> COMPV_ERROR_CODE {
const T* ptrIn = in->ptr<const T>(ystart);
T* ptrOut = out_->ptr<T>(ystart);
if (std::is_same<T, compv_float64_t>::value) {
void(*CompVMathScale_64f64f)(const compv_float64_t* ptrIn, compv_float64_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const compv_float64_t* s1)
= nullptr;
COMPV_CHECK_CODE_RETURN(CompVMathScale::hookScale_64f(&CompVMathScale_64f64f));
CompVMathScale_64f64f(
reinterpret_cast<const compv_float64_t*>(ptrIn), reinterpret_cast<compv_float64_t*>(ptrOut),
cols, (yend - ystart), stride,
reinterpret_cast<const compv_float64_t*>(&ss)
);
}
else if (std::is_same<T, compv_float32_t>::value) {
void(*CompVMathScale_32f32f)(const compv_float32_t* ptrIn, compv_float32_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const compv_float32_t* s1)
= CompVMathScaleScale_C;
#if COMPV_ARCH_X86
if (CompVCpu::isEnabled(kCpuFlagSSE2) && COMPV_IS_ALIGNED_SSE(ptrIn) && COMPV_IS_ALIGNED_SSE(ptrOut) && COMPV_IS_ALIGNED_SSE(stride * sizeof(compv_float32_t))) {
COMPV_EXEC_IFDEF_INTRIN_X86(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Intrin_SSE2);
COMPV_EXEC_IFDEF_ASM_X64(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Asm_X64_SSE2);
}
if (CompVCpu::isEnabled(kCpuFlagAVX) && COMPV_IS_ALIGNED_AVX(ptrIn) && COMPV_IS_ALIGNED_AVX(ptrOut) && COMPV_IS_ALIGNED_AVX(stride * sizeof(compv_float32_t))) {
COMPV_EXEC_IFDEF_ASM_X64(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Asm_X64_AVX);
}
#elif COMPV_ARCH_ARM
if (CompVCpu::isEnabled(kCpuFlagARM_NEON) && COMPV_IS_ALIGNED_NEON(ptrIn) && COMPV_IS_ALIGNED_NEON(ptrOut) && COMPV_IS_ALIGNED_NEON(stride * sizeof(compv_float32_t))) {
COMPV_EXEC_IFDEF_INTRIN_ARM(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Intrin_NEON);
COMPV_EXEC_IFDEF_ASM_ARM32(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Asm_NEON32);
COMPV_EXEC_IFDEF_ASM_ARM64(CompVMathScale_32f32f = CompVMathScaleScale_32f32f_Asm_NEON64);
}
#endif
CompVMathScale_32f32f(
reinterpret_cast<const compv_float32_t*>(ptrIn), reinterpret_cast<compv_float32_t*>(ptrOut),
cols, (yend - ystart), stride,
reinterpret_cast<const compv_float32_t*>(&ss)
);
}
else {
CompVMathScaleScale_C(
ptrIn, ptrOut,
cols, (yend - ystart), stride,
&ss
);
}
return COMPV_ERROR_CODE_S_OK;
};
COMPV_CHECK_CODE_RETURN(CompVThreadDispatcher::dispatchDividingAcrossY(
funcPtr,
cols,
rows,
enforceSingleThread ? SIZE_MAX : (cols * 1)
));
*out = out_;
return COMPV_ERROR_CODE_S_OK;
}
// out[i] = (in[i] * s)
COMPV_ERROR_CODE CompVMathScale::scale(const CompVMatPtr &in, const double& s, CompVMatPtrPtr out, const bool enforceSingleThread COMPV_DEFAULT(false))
{
COMPV_CHECK_EXP_RETURN(!in || !out || in->planeCount() != 1
, COMPV_ERROR_CODE_E_INVALID_PARAMETER);
CompVGenericFloatInvokeCodeRawType(in->subType(), CompVMathScaleScale, in, s, out, enforceSingleThread);
return COMPV_ERROR_CODE_S_OK;
}
COMPV_ERROR_CODE CompVMathScale::hookScale_64f(
void(**CompVMathScaleScale_64f64f)(const compv_float64_t* ptrIn, compv_float64_t* ptrOut, const compv_uscalar_t width, const compv_uscalar_t height, const compv_uscalar_t stride, const compv_float64_t* s1)
)
{
COMPV_CHECK_EXP_RETURN(!CompVMathScaleScale_64f64f, COMPV_ERROR_CODE_E_INVALID_PARAMETER);
*CompVMathScaleScale_64f64f = CompVMathScaleScale_C;
#if COMPV_ARCH_X86
if (CompVCpu::isEnabled(kCpuFlagSSE2)) {
COMPV_EXEC_IFDEF_INTRIN_X86(*CompVMathScaleScale_64f64f = CompVMathScaleScale_64f64f_Intrin_SSE2);
COMPV_EXEC_IFDEF_ASM_X64(*CompVMathScaleScale_64f64f = CompVMathScaleScale_64f64f_Asm_X64_SSE2);
}
if (CompVCpu::isEnabled(kCpuFlagAVX)) {
COMPV_EXEC_IFDEF_ASM_X64(*CompVMathScaleScale_64f64f = CompVMathScaleScale_64f64f_Asm_X64_AVX);
}
#elif COMPV_ARCH_ARM
#endif
return COMPV_ERROR_CODE_S_OK;
}
COMPV_NAMESPACE_END()