From cf29530612b60050147098b8fca025de554d15ec Mon Sep 17 00:00:00 2001 From: Hengfei Yang Date: Mon, 4 Jul 2022 20:08:57 +0800 Subject: [PATCH] fix Fuzziiness in Match query (#258) --- pkg/core/search_test.go | 102 ++++++++++++++++++++++++++++++++++++++ pkg/uquery/query/fuzzy.go | 48 ++++++++++++++---- pkg/uquery/query/match.go | 23 +++++---- 3 files changed, 153 insertions(+), 20 deletions(-) diff --git a/pkg/core/search_test.go b/pkg/core/search_test.go index e8b432d02..5f1d23fd1 100644 --- a/pkg/core/search_test.go +++ b/pkg/core/search_test.go @@ -174,6 +174,108 @@ func TestIndex_Search(t *testing.T) { }, }, }, + { + name: "Search Query - fuzzy fuzziness AUTO", + args: args{ + iQuery: &meta.ZincQuery{ + Query: &meta.Query{ + Fuzzy: map[string]*meta.FuzzyQuery{ + "_all": { + Value: "fransisco", // note the wrong spelling, + Fuzziness: "AUTO", + }, + }, + }, + Size: 10, + }, + }, + data: []map[string]interface{}{ + { + "name": "Prabhat Sharma", + "address": map[string]interface{}{ + "city": "San Francisco", + "state": "California", + }, + "hobby": "chess", + }, + { + "name": "Leonardo DiCaprio", + "address": map[string]interface{}{ + "city": "Los angeles", + "state": "California", + }, + "hobby": "chess", + }, + }, + }, + { + name: "Search Query - fuzzy fuzziness AUTO", + args: args{ + iQuery: &meta.ZincQuery{ + Query: &meta.Query{ + Fuzzy: map[string]*meta.FuzzyQuery{ + "_all": { + Value: "fransisco", // note the wrong spelling, + Fuzziness: "AUTO:3,6", + }, + }, + }, + Size: 10, + }, + }, + data: []map[string]interface{}{ + { + "name": "Prabhat Sharma", + "address": map[string]interface{}{ + "city": "San Francisco", + "state": "California", + }, + "hobby": "chess", + }, + { + "name": "Leonardo DiCaprio", + "address": map[string]interface{}{ + "city": "Los angeles", + "state": "California", + }, + "hobby": "chess", + }, + }, + }, + { + name: "Search Query - fuzzy fuzziness 2", + args: args{ + iQuery: &meta.ZincQuery{ + Query: &meta.Query{ + Fuzzy: map[string]*meta.FuzzyQuery{ + "_all": { + Value: "fransisco", // note the wrong spelling, + Fuzziness: 2, + }, + }, + }, + Size: 10, + }, + }, + data: []map[string]interface{}{ + { + "name": "Prabhat Sharma", + "address": map[string]interface{}{ + "city": "San Francisco", + "state": "California", + }, + "hobby": "chess", + }, + { + "name": "Leonardo DiCaprio", + "address": map[string]interface{}{ + "city": "Los angeles", + "state": "California", + }, + "hobby": "chess", + }, + }, + }, { name: "Search Query - querystring", args: args{ diff --git a/pkg/uquery/query/fuzzy.go b/pkg/uquery/query/fuzzy.go index ee51215f8..5e5e43d04 100644 --- a/pkg/uquery/query/fuzzy.go +++ b/pkg/uquery/query/fuzzy.go @@ -23,6 +23,7 @@ import ( "github.com/zinclabs/zinc/pkg/errors" "github.com/zinclabs/zinc/pkg/meta" + "github.com/zinclabs/zinc/pkg/zutils" ) func FuzzyQuery(query map[string]interface{}) (bluge.Query, error) { @@ -43,13 +44,13 @@ func FuzzyQuery(query map[string]interface{}) (bluge.Query, error) { k := strings.ToLower(k) switch k { case "value": - value.Value = v.(string) + value.Value, _ = zutils.ToString(v) case "fuzziness": - value.Fuzziness = v.(string) + value.Fuzziness = v case "prefix_length": - value.PrefixLength = v.(float64) + value.PrefixLength, _ = zutils.ToFloat64(v) case "boost": - value.Boost = v.(float64) + value.Boost, _ = zutils.ToFloat64(v) default: return nil, errors.New(errors.ErrorTypeParsingException, fmt.Sprintf("[fuzzy] unknown field [%s]", k)) } @@ -61,11 +62,9 @@ func FuzzyQuery(query map[string]interface{}) (bluge.Query, error) { subq := bluge.NewFuzzyQuery(value.Value).SetField(field) if value.Fuzziness != nil { - switch v := value.Fuzziness.(type) { - case string: - // TODO: support other fuzziness: AUTO - case float64: - subq.SetFuzziness(int(v)) + v := ParseFuzziness(value.Fuzziness, len(value.Value)) + if v > 0 { + subq.SetFuzziness(v) } } if value.PrefixLength > 0 { @@ -77,3 +76,34 @@ func FuzzyQuery(query map[string]interface{}) (bluge.Query, error) { return subq, nil } + +func ParseFuzziness(fuzziness interface{}, n int) int { + val, _ := zutils.ToString(fuzziness) + val = strings.ToUpper(val) + if !strings.HasPrefix(val, "AUTO") { + v, _ := zutils.ToInt(val) + return v + } + + n1 := 3 + n2 := 6 + if strings.Contains(val, ":") && strings.Contains(val, ",") { + val := strings.TrimPrefix(val, "AUTO:") + vals := strings.Split(val, ",") + if len(vals) == 2 { + n1, _ = zutils.ToInt(vals[0]) + n2, _ = zutils.ToInt(vals[1]) + if n1 < 2 || n1 >= n2 { + return 0 + } + } + } + + v := 0 + if n >= n2 { + v = 2 + } else if n >= n1 { + v = 1 + } + return v +} diff --git a/pkg/uquery/query/match.go b/pkg/uquery/query/match.go index 0962abd08..57c04ff21 100644 --- a/pkg/uquery/query/match.go +++ b/pkg/uquery/query/match.go @@ -25,6 +25,7 @@ import ( "github.com/zinclabs/zinc/pkg/errors" "github.com/zinclabs/zinc/pkg/meta" zincanalysis "github.com/zinclabs/zinc/pkg/uquery/analysis" + "github.com/zinclabs/zinc/pkg/zutils" ) func MatchQuery(query map[string]interface{}, mappings *meta.Mappings, analyzers map[string]*analysis.Analyzer) (bluge.Query, error) { @@ -45,17 +46,17 @@ func MatchQuery(query map[string]interface{}, mappings *meta.Mappings, analyzers k := strings.ToLower(k) switch k { case "query": - value.Query = v.(string) + value.Query, _ = zutils.ToString(v) case "analyzer": - value.Analyzer = v.(string) + value.Analyzer, _ = zutils.ToString(v) case "operator": - value.Operator = v.(string) + value.Operator, _ = zutils.ToString(v) case "fuzziness": - value.Fuzziness = v.(string) + value.Fuzziness = v case "prefix_length": - value.PrefixLength = v.(float64) + value.PrefixLength, _ = zutils.ToFloat64(v) case "boost": - value.Boost = v.(float64) + value.Boost, _ = zutils.ToFloat64(v) default: return nil, errors.New(errors.ErrorTypeParsingException, fmt.Sprintf("[match] unknown field [%s]", k)) } @@ -98,11 +99,11 @@ func MatchQuery(query map[string]interface{}, mappings *meta.Mappings, analyzers } } if value.Fuzziness != nil { - switch v := value.Fuzziness.(type) { - case string: - // TODO: support other fuzziness: AUTO - case float64: - subq.SetFuzziness(int(v)) + if value.Fuzziness != nil { + v := ParseFuzziness(value.Fuzziness, len(value.Query)) + if v > 0 { + subq.SetFuzziness(v) + } } } if value.PrefixLength > 0 {