-
Notifications
You must be signed in to change notification settings - Fork 743
/
floats_amd64.go
113 lines (99 loc) · 3.05 KB
/
floats_amd64.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//go:build !noasm
// Copyright 2022 gorse Project Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package floats
import (
"github.com/klauspost/cpuid/v2"
"unsafe"
)
//go:generate go run ../../cmd/goat src/floats_avx.c -O3 -mavx
//go:generate go run ../../cmd/goat src/floats_avx512.c -O3 -mavx -mfma -mavx512f -mavx512dq
var impl = Default
func init() {
if cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512DQ) {
impl = AVX512
} else if cpuid.CPU.Supports(cpuid.AVX) {
impl = AVX
}
}
type implementation int
const (
Default implementation = iota
AVX
AVX512
)
func (i implementation) String() string {
switch i {
case AVX:
return "avx"
case AVX512:
return "avx512"
default:
return "default"
}
}
func (i implementation) mulConstAddTo(a []float32, b float32, c []float32) {
switch i {
case AVX:
_mm256_mul_const_add_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
case AVX512:
_mm512_mul_const_add_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
default:
mulConstAddTo(a, b, c)
}
}
func (i implementation) mulConstTo(a []float32, b float32, c []float32) {
switch i {
case AVX:
_mm256_mul_const_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
case AVX512:
_mm512_mul_const_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
default:
mulConstTo(a, b, c)
}
}
func (i implementation) mulTo(a, b, c []float32) {
switch i {
case AVX:
_mm256_mul_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
case AVX512:
_mm512_mul_to(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(&c[0]), unsafe.Pointer(uintptr(len(a))))
default:
mulTo(a, b, c)
}
}
func (i implementation) mulConst(a []float32, b float32) {
switch i {
case AVX:
_mm256_mul_const(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(uintptr(len(a))))
case AVX512:
_mm512_mul_const(unsafe.Pointer(&a[0]), unsafe.Pointer(&b), unsafe.Pointer(uintptr(len(a))))
default:
mulConst(a, b)
}
}
func (i implementation) dot(a, b []float32) float32 {
switch i {
case AVX:
var ret float32
_mm256_dot(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(uintptr(len(a))), unsafe.Pointer(&ret))
return ret
case AVX512:
var ret float32
_mm512_dot(unsafe.Pointer(&a[0]), unsafe.Pointer(&b[0]), unsafe.Pointer(uintptr(len(a))), unsafe.Pointer(&ret))
return ret
default:
return dot(a, b)
}
}