-
Notifications
You must be signed in to change notification settings - Fork 0
/
solver.go
99 lines (89 loc) · 2.3 KB
/
solver.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// Copyright © 2020 wego authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package glove
import (
"math"
"github.com/wujunfeng1/wego/pkg/corpus/dictionary"
"github.com/wujunfeng1/wego/pkg/model/modelutil/matrix"
)
type solver interface {
trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64)
}
type stochastic struct {
initlr float64
}
func newStochastic(opts Options) solver {
return &stochastic{
initlr: opts.Initlr,
}
}
func (sol *stochastic) trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64) {
v1, v2 := param.Slice(l1), param.Slice(l2)
dim, diff := len(v1)-1, 0.
for i := 0; i < dim; i++ {
diff += v1[i] * v2[i]
}
diff += v1[dim] + v2[dim] - f
diff *= coef * sol.initlr
for i := 0; i < dim; i++ {
t1, t2 := diff*v2[i], diff*v1[i]
v1[i] -= t1
v2[i] -= t2
}
v1[dim] -= diff
v2[dim] -= diff
}
type adaGrad struct {
initlr float64
gradsq *matrix.Matrix
}
func newAdaGrad(dic *dictionary.Dictionary, opts Options) solver {
dimAndBias := opts.Dim + 1
return &adaGrad{
initlr: opts.Initlr,
gradsq: matrix.New(
dic.Len()*2,
dimAndBias,
func(_ int, vec []float64) {
for i := 0; i < dimAndBias; i++ {
vec[i] = 1.
}
},
),
}
}
func (sol *adaGrad) trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64) {
v1, v2 := param.Slice(l1), param.Slice(l2)
g1, g2 := sol.gradsq.Slice(l1), sol.gradsq.Slice(l2)
dim, diff := len(v1)-1, 0.
for i := 0; i < dim; i++ {
diff += v1[i] * v2[i]
}
diff += v1[dim] + v2[dim] - f
diff *= coef * sol.initlr
for i := 0; i < dim; i++ {
t1, t2 := diff*v2[i], diff*v1[i]
g1[i] += t1 * t1
g2[i] += t2 * t2
t1 /= math.Sqrt(g1[i])
t2 /= math.Sqrt(g2[i])
v1[i] -= t1
v2[i] -= t2
}
v1[dim] -= diff / math.Sqrt(g1[dim])
v2[dim] -= diff / math.Sqrt(g2[dim])
diff *= diff
g1[dim] += diff
g2[dim] += diff
}