Skip to content

Commit

Permalink
add ExtractWithWeight api
Browse files Browse the repository at this point in the history
  • Loading branch information
yanyiwu committed Sep 3, 2016
1 parent 73e13a3 commit 37f3eac
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 17 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ func main() {
wordinfos = x.Tokenize(s, gojieba.DefaultMode, !use_hmm)
fmt.Println(s)
fmt.Println("Tokenize:(默认模式)", wordinfos)
ex := NewExtractor()
defer ex.Free()
keywords := ex.ExtractWithWeight(s, 5)
fmt.Println("Extract:", keywords)
}
```

Expand Down
19 changes: 16 additions & 3 deletions extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,30 @@ func (x *Extractor) Extract(s string, topk int) []string {
}

type WordWeight struct {
word string
weight float64
Word string
Weight float64
}

func (x *Extractor) ExtractWithWeight(s string, topk int) []WordWeight {
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
//var words *C.struct_CWordWeight = C.ExtractWithWeight(x.extractor, cstr, C.int(topk))
words := C.ExtractWithWeight(x.extractor, cstr, C.int(topk))
p := unsafe.Pointer(words)
res := cwordweights((*C.struct_CWordWeight)(p))
defer C.FreeWordWeights(words)
return res
}

func cwordweights(x *C.struct_CWordWeight) []WordWeight {
var s []WordWeight
eltSize := unsafe.Sizeof(*x)
for (*x).word != nil {
ww := WordWeight{
C.GoString(((C.struct_CWordWeight)(*x)).word),
float64((*x).weight),
}
s = append(s, ww)
x = (*C.struct_CWordWeight)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + eltSize))
}
return s
}
4 changes: 4 additions & 0 deletions extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ func ExampleExtract() {
words := x.Extract(s, 5)
fmt.Println(s)
fmt.Println("关键词抽取:", strings.Join(words, "/"))
word_weights := x.ExtractWithWeight(s, 5)
fmt.Println("关键词抽取:", word_weights)

// Output:
// 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
// 关键词抽取: CEO/升职/加薪/手扶拖拉机/巅峰
// 关键词抽取: [{CEO 11.739204307083542} {升职 10.8561552143} {加薪 10.642581114} {手扶拖拉机 10.0088573539} {巅峰 9.49395840471}]
}

func TestExtractor(t *testing.T) {
Expand All @@ -46,5 +49,6 @@ func BenchmarkExtractor(b *testing.B) {
defer b.StopTimer()
for i := 0; i < b.N; i++ {
x.Extract(s, 10)
x.ExtractWithWeight(s, 10)
}
}
14 changes: 0 additions & 14 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,6 @@ func cstrings(x **C.char) []string {
return s
}

func cwordweights(x *C.struct_CWordWeight) []WordWeight {
var s []WordWeight
eltSize := unsafe.Sizeof(*x)
for *x != nil {
ww := WordWeight{
C.GoString(((C.struct_CWordWeight)(*x)).word),
(*x).weight,
}
s = append(s, ww)
x = (*C.struct_CWordWeight)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + eltSize))
}
return s
}

func convertWords(s string, words *C.Word) []Word {
result := make([]Word, 0)
x := words
Expand Down

0 comments on commit 37f3eac

Please sign in to comment.