Skip to content

Commit

Permalink
avx implementation for (s/d)log10
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhuang committed May 15, 2015
1 parent 13278d3 commit e03f02a
Show file tree
Hide file tree
Showing 20 changed files with 367 additions and 4 deletions.
3 changes: 3 additions & 0 deletions include/openvml.h
Expand Up @@ -52,6 +52,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdPow)(VML_INT n, const double * a, const d
OPENVML_EXPORT void OpenVML_FUNCNAME(vsExp)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME(vdExp)(VML_INT n, const double * a, double * y);

OPENVML_EXPORT void OpenVML_FUNCNAME(vsLog10)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME(vdLog10)(VML_INT n, const double * a, double * y);

OPENVML_EXPORT void OpenVML_FUNCNAME(vsTanh)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME(vdTanh)(VML_INT n, const double * a, double * y);

Expand Down
8 changes: 7 additions & 1 deletion include/openvml_kernel.h
Expand Up @@ -54,9 +54,15 @@ void OpenVML_FUNCNAME(cexp_k)(VMLLONG n, float * a, float * b, float * y, float
void OpenVML_FUNCNAME(zexp_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);


void OpenVML_FUNCNAME(slog10_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
void OpenVML_FUNCNAME(dlog10_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
void OpenVML_FUNCNAME(clog10_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
void OpenVML_FUNCNAME(zlog10_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);


void OpenVML_FUNCNAME(stanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
void OpenVML_FUNCNAME(dtanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);
void OpenVML_FUNCNAME(ctanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params);
void OpenVML_FUNCNAME(ztanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params);

#endif
#endif
11 changes: 10 additions & 1 deletion include/openvml_macros.h
Expand Up @@ -53,6 +53,11 @@
#define CEXP_K OpenVML_FUNCNAME(cexp_k)
#define ZEXP_K OpenVML_FUNCNAME(zexp_k)

#define SLOG10_K OpenVML_FUNCNAME(slog10_k)
#define DLOG10_K OpenVML_FUNCNAME(dlog10_k)
#define CLOG10_K OpenVML_FUNCNAME(clog10_k)
#define ZLOG10_K OpenVML_FUNCNAME(zlog10_k)


#define STANH_K OpenVML_FUNCNAME(stanh_k)
#define DTANH_K OpenVML_FUNCNAME(dtanh_k)
Expand All @@ -66,12 +71,14 @@
#define SUB_K SSUB_K
#define POW_K SPOW_K
#define EXP_K SEXP_K
#define LOG10_K SLOG10_K
#define TANH_K STANH_K
#else
#define ADD_K DADD_K
#define SUB_K DSUB_K
#define POW_K DPOW_K
#define EXP_K DEXP_K
#define LOG10_K DLOG10_K
#define TANH_K DTANH_K
#endif
#else
Expand All @@ -80,14 +87,16 @@
#define SUB_K CSUB_K
#define POW_K CPOW_K
#define EXP_K CEXP_K
#define LOG10_K CLOG10_K
#define TANH_K CTANH_K
#else
#define ADD_K ZADD_K
#define SUB_K ZSUB_K
#define POW_K ZPOW_K
#define EXP_K ZEXP_K
#define LOG10_K ZLOG10_K
#define TANH_K ZTANH_K
#endif
#endif

#endif
#endif
3 changes: 3 additions & 0 deletions include/openvml_reference.h
Expand Up @@ -52,6 +52,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdPow)(VML_INT n, const double * a, con
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsExp)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdExp)(VML_INT n, const double * a, double * y);

OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLog10)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLog10)(VML_INT n, const double * a, double * y);

OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsTanh)(VML_INT n, const float * a, float * y);
OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdTanh)(VML_INT n, const double * a, double * y);

Expand Down
2 changes: 1 addition & 1 deletion interface/CMakeLists.txt
Expand Up @@ -5,7 +5,7 @@ set(OpenVML_LIBSRC_D "")
set(OpenVML_LIBSRC_C "")
set(OpenVML_LIBSRC_Z "")

set(REAL_INTERFACE_LIST add sub pow exp tanh)
set(REAL_INTERFACE_LIST add sub pow exp tanh log10)
set(COMPLEX_INTERFACE_LIST add sub)

function(cap_string var_name var_name_cap)
Expand Down
39 changes: 39 additions & 0 deletions interface/log10.c
@@ -0,0 +1,39 @@
/* * Copyright (c) 2014, 2015 Zhang Xianyi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <openvml.h>
#include <openvml_driver.h>
#include <openvml_kernel.h>


void CNAME(VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) {

if (n<=0) return;
if (a==NULL || y==NULL) return;


EXEC_VML(0, LOG10_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL);

}
2 changes: 1 addition & 1 deletion kernel/CMakeLists.txt
Expand Up @@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_D "")
set(OpenVML_LIBSRC_C "")
set(OpenVML_LIBSRC_Z "")

set(KERNEL_LIST add sub pow exp tanh) #s,d
set(KERNEL_LIST add sub pow exp tanh log10) #s,d
set(Z_KERNEL_LIST add sub) #c,z
######## s,d kernels

Expand Down
3 changes: 3 additions & 0 deletions kernel/aarch64/Kernel_generic.txt
Expand Up @@ -14,6 +14,9 @@ set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
set(exp_S_KERNEL_SOURCE generic/exp_kernel.c)
set(exp_D_KERNEL_SOURCE generic/exp_kernel.c)

set(log10_S_KERNEL_SOURCE generic/log10_kernel.c)
set(log10_D_KERNEL_SOURCE generic/log10_kernel.c)

set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c)
set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c)

3 changes: 3 additions & 0 deletions kernel/arm/Kernel_generic.txt
Expand Up @@ -14,6 +14,9 @@ set(pow_D_KERNEL_SOURCE generic/pow_kernel.c)
set(exp_S_KERNEL_SOURCE generic/exp_kernel.c)
set(exp_D_KERNEL_SOURCE generic/exp_kernel.c)

set(exp_S_KERNEL_SOURCE generic/log10_kernel.c)
set(exp_D_KERNEL_SOURCE generic/log10_kernel.c)

set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c)
set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c)

3 changes: 3 additions & 0 deletions kernel/generic/Kernel_generic.txt
Expand Up @@ -16,6 +16,9 @@ set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c)
set(exp_S_KERNEL_SOURCE ${OpenVML_ARCH}/exp_kernel.c)
set(exp_D_KERNEL_SOURCE ${OpenVML_ARCH}/exp_kernel.c)

set(log10_S_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c)
set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c)

set(tanh_S_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c)
set(tanh_D_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c)

40 changes: 40 additions & 0 deletions kernel/generic/log10_kernel.c
@@ -0,0 +1,40 @@
/* * Copyright (c) 2014, 2015 Zhang Xianyi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <math.h>
#include "openvml_kernel.h"

#ifndef DOUBLE
#define LOG10 log10f
#else
#define LOG10 log10
#endif

void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
VMLLONG i=0;
for(i=0; i<n; i++){
y[i]=LOG10(a[i]);
}
}
2 changes: 2 additions & 0 deletions kernel/x86_64/Kernel_generic.txt
Expand Up @@ -17,3 +17,5 @@ set(exp_D_KERNEL_SOURCE generic/exp_kernel.c)
set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c)
set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c)

set(log10_S_KERNEL_SOURCE generic/log10_kernel.c)
set(log10_D_KERNEL_SOURCE generic/log10_kernel.c)
3 changes: 3 additions & 0 deletions kernel/x86_64/Kernel_haswell.txt
Expand Up @@ -15,3 +15,6 @@ set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)

set(exp_S_KERNEL_SOURCE ${OpenVML_ARCH}/sexp_kernel_avx2.c)

set(log10_S_KERNEL_SOURCE ${OpenVML_ARCH}/slog10_kernel_avx.c)
set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/dlog10_kernel_avx.c)
3 changes: 3 additions & 0 deletions kernel/x86_64/Kernel_sandybridge.txt
Expand Up @@ -13,3 +13,6 @@ set(sub_S_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
set(sub_D_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)
set(sub_C_KERNEL_SOURCE ${OpenVML_ARCH}/ssub_kernel_avx.c)
set(sub_Z_KERNEL_SOURCE ${OpenVML_ARCH}/dsub_kernel_avx.c)

set(log10_S_KERNEL_SOURCE ${OpenVML_ARCH}/slog10_kernel_avx.c)
set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/dlog10_kernel_avx.c)
78 changes: 78 additions & 0 deletions kernel/x86_64/dlog10_kernel_avx.c
@@ -0,0 +1,78 @@
/* * Copyright (c) 2014, 2015 Zhang Xianyi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <math.h>
#include "openvml_kernel.h"

void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
VMLLONG loop_count=(COMPSIZE*n) >> 5;
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;

int i=0;

while(loop_count>0){

__m256d av0=_mm256_loadu_pd(a);
__m256d av1=_mm256_loadu_pd(a+4);
__m256d av2=_mm256_loadu_pd(a+8);
__m256d av3=_mm256_loadu_pd(a+12);

__m256d av4=_mm256_loadu_pd(a+16);
__m256d av5=_mm256_loadu_pd(a+20);
__m256d av6=_mm256_loadu_pd(a+24);
__m256d av7=_mm256_loadu_pd(a+28);




__m256d yv0=_mm256_log10_pd(av0);
__m256d yv1=_mm256_log10_pd(av1);
__m256d yv2=_mm256_log10_pd(av2);
__m256d yv3=_mm256_log10_pd(av3);

__m256d yv4=_mm256_log10_pd(av4);
__m256d yv5=_mm256_log10_pd(av5);
__m256d yv6=_mm256_log10_pd(av6);
__m256d yv7=_mm256_log10_pd(av7);

_mm256_storeu_pd(y, yv0);
_mm256_storeu_pd(y+4, yv1);
_mm256_storeu_pd(y+8, yv2);
_mm256_storeu_pd(y+12, yv3);

_mm256_storeu_pd(y+16, yv4);
_mm256_storeu_pd(y+20, yv5);
_mm256_storeu_pd(y+24, yv6);
_mm256_storeu_pd(y+28, yv7);

a+=32;
y+=32;
loop_count--;
}

for(i=0; i<remain_count; i++){
y[i]=log10(a[i]);
}
}
63 changes: 63 additions & 0 deletions kernel/x86_64/slog10_kernel_avx.c
@@ -0,0 +1,63 @@
/* * Copyright (c) 2014, 2015 Zhang Xianyi
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this
* list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <math.h>
#include "openvml_kernel.h"

void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) {
VMLLONG loop_count=(COMPSIZE*n) >> 5;
VMLLONG remain_count=(COMPSIZE*n) & 0x1f;

int i=0;

while(loop_count>0){

__m256 av0=_mm256_loadu_ps(a);
__m256 av1=_mm256_loadu_ps(a+8);
__m256 av2=_mm256_loadu_ps(a+16);
__m256 av3=_mm256_loadu_ps(a+24);


__m256 yv0=_mm256_log10_ps(av0);
__m256 yv1=_mm256_log10_ps(av1);
__m256 yv2=_mm256_log10_ps(av2);
__m256 yv3=_mm256_log10_ps(av3);


_mm256_storeu_ps(y, yv0);
_mm256_storeu_ps(y+8, yv1);
_mm256_storeu_ps(y+16, yv2);
_mm256_storeu_ps(y+24, yv3);

a+=32;
y+=32;
loop_count--;
}

for(i=0; i<remain_count; i++){
y[i]=log10f(a[i]);
}
}
1 change: 1 addition & 0 deletions reference/CMakeLists.txt
Expand Up @@ -4,6 +4,7 @@ set(OpenVML_REF_SRC
vpow.c
vexp.c
vtanh.c
vlog10.c
)

add_library(${OpenVML_LIBNAME}_ref SHARED ${OpenVML_REF_SRC})
Expand Down

0 comments on commit e03f02a

Please sign in to comment.