-
Notifications
You must be signed in to change notification settings - Fork 184
/
Copy pathstdlib_intrinsics_dot_product.fypp
77 lines (67 loc) · 2.27 KB
/
stdlib_intrinsics_dot_product.fypp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#:include "common.fypp"
#:set I_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES, INT_KINDS))
#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
#:def cnjg(type,expression)
#:if 'complex' in type
conjg(${expression}$)
#:else
${expression}$
#:endif
#:enddef
submodule(stdlib_intrinsics) stdlib_intrinsics_dot_product
!!Replacement for certain Fortran intrinsic functions offering either faster and/or more accurate implementations.
!! ([Specification](../page/specs/stdlib_intrinsics.html))
use stdlib_kinds
use stdlib_constants
implicit none
integer, parameter :: ilp = int64
contains
! This implementation is based on https://github.com/jalvesz/fast_math
#:for k, t, s in I_KINDS_TYPES + R_KINDS_TYPES + C_KINDS_TYPES
pure module function stdlib_dot_product_${s}$(a,b) result(p)
integer(ilp), parameter :: chunk = 64
${t}$, intent(in) :: a(:)
${t}$, intent(in) :: b(:)
${t}$ :: p
${t}$ :: abatch(chunk)
integer(ilp) :: i, n, r
! -----------------------------
n = size(a,kind=ilp)
r = mod(n,chunk)
abatch(1:r) = a(1:r)*${cnjg(t,'b(1:r)')}$
abatch(r+1:chunk) = zero_${s}$
do i = r+1, n-r, chunk
abatch(1:chunk) = abatch(1:chunk) + a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$
end do
p = zero_${s}$
do i = 1, chunk/2
p = p + abatch(i)+abatch(chunk/2+i)
end do
end function
#:endfor
#:for k, t, s in R_KINDS_TYPES + C_KINDS_TYPES
pure module function stdlib_dot_product_kahan_${s}$(a,b) result(p)
integer(ilp), parameter :: chunk = 64
${t}$, intent(in) :: a(:)
${t}$, intent(in) :: b(:)
${t}$ :: p
${t}$ :: abatch(chunk)
${t}$ :: cbatch(chunk)
integer(ilp) :: i, n, r
! -----------------------------
n = size(a,kind=ilp)
r = mod(n,chunk)
abatch(1:r) = a(1:r)*${cnjg(t,'b(1:r)')}$
abatch(r+1:chunk) = zero_${s}$
cbatch = zero_${s}$
do i = r+1, n-r, chunk
call kahan_kernel( a(i:i+chunk-1)*${cnjg(t,'b(i:i+chunk-1)')}$ , abatch(1:chunk) , cbatch(1:chunk) )
end do
p = zero_${s}$
do i = 1, chunk
call kahan_kernel( abatch(i) , p , cbatch(i) )
end do
end function
#:endfor
end submodule stdlib_intrinsics_dot_product