Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exact words #22

Merged
merged 5 commits into from Feb 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion c-shared.c
Expand Up @@ -12,9 +12,10 @@ Suggestion* makeSuggestion(char* word, int weight, int learned_on)
return sug;
}

TransliterationResult* makeResult(varray* exact_matches, varray* dictionary_suggestions, varray* pattern_dictionary_suggestions, varray* tokenizer_suggestions, varray* greedy_tokenized)
TransliterationResult* makeResult(varray* exact_words, varray* exact_matches, varray* dictionary_suggestions, varray* pattern_dictionary_suggestions, varray* tokenizer_suggestions, varray* greedy_tokenized)
{
TransliterationResult *result = (TransliterationResult*) malloc (sizeof(TransliterationResult));
result->ExactWords = exact_words;
result->ExactMatches = exact_matches;
result->DictionarySuggestions = dictionary_suggestions;
result->PatternDictionarySuggestions = pattern_dictionary_suggestions;
Expand Down
12 changes: 9 additions & 3 deletions c-shared.go
Expand Up @@ -65,10 +65,16 @@ func makeCTransliterationResult(ctx context.Context, goResult govarnam.Translite
// They should be freed manually. GC won't pick it.
// The freeing should be done by programs using govarnam

cExactMatch := C.varray_init()
cExactWords := C.varray_init()
for _, sug := range goResult.ExactWords {
cSug := unsafe.Pointer(C.makeSuggestion(C.CString(sug.Word), C.int(sug.Weight), C.int(sug.LearnedOn)))
C.varray_push(cExactWords, cSug)
}

cExactMatches := C.varray_init()
for _, sug := range goResult.ExactMatches {
cSug := unsafe.Pointer(C.makeSuggestion(C.CString(sug.Word), C.int(sug.Weight), C.int(sug.LearnedOn)))
C.varray_push(cExactMatch, cSug)
C.varray_push(cExactMatches, cSug)
}

cDictionarySuggestions := C.varray_init()
Expand All @@ -95,7 +101,7 @@ func makeCTransliterationResult(ctx context.Context, goResult govarnam.Translite
C.varray_push(cGreedyTokenized, cSug)
}

*resultPointer = C.makeResult(cExactMatch, cDictionarySuggestions, cPatternDictionarySuggestions, cTokenizerSuggestions, cGreedyTokenized)
*resultPointer = C.makeResult(cExactWords, cExactMatches, cDictionarySuggestions, cPatternDictionarySuggestions, cTokenizerSuggestions, cGreedyTokenized)

return C.VARNAM_SUCCESS
}
Expand Down
3 changes: 2 additions & 1 deletion c-shared.h
Expand Up @@ -25,6 +25,7 @@ typedef struct Suggestion_t {
} Suggestion;

typedef struct TransliterationResult_t {
varray* ExactWords;
varray* ExactMatches;
varray* DictionarySuggestions;
varray* PatternDictionarySuggestions;
Expand All @@ -34,7 +35,7 @@ typedef struct TransliterationResult_t {

Suggestion* makeSuggestion(char* word, int weight, int learned_on);

TransliterationResult* makeResult(varray* exact_matches, varray* dictionary_suggestions, varray* pattern_dictionary_suggestions, varray* tokenizer_suggestions, varray* greedy_tokenized);
TransliterationResult* makeResult(varray* exact_words, varray* exact_matches, varray* dictionary_suggestions, varray* pattern_dictionary_suggestions, varray* tokenizer_suggestions, varray* greedy_tokenized);

void destroySuggestionsArray(varray* pointer);
void destroyTransliterationResult(TransliterationResult*);
Expand Down
3 changes: 3 additions & 0 deletions cli/main.go
Expand Up @@ -169,6 +169,9 @@ func main() {
fmt.Println("Greedy Tokenized")
printSugs(result.GreedyTokenized)

fmt.Println("Exact Words")
printSugs(result.ExactWords)

fmt.Println("Exact Matches")
printSugs(result.ExactMatches)

Expand Down
112 changes: 72 additions & 40 deletions govarnam/channel.go
Expand Up @@ -8,6 +8,7 @@ import (
)

type channelDictionaryResult struct {
exactWords []Suggestion
exactMatches []Suggestion
suggestions []Suggestion
}
Expand Down Expand Up @@ -71,8 +72,9 @@ func (varnam *Varnam) channelTokensToGreedySuggestions(ctx context.Context, toke

func (varnam *Varnam) channelGetFromDictionary(ctx context.Context, word string, tokens *[]Token, channel chan channelDictionaryResult) {
var (
dictResults []Suggestion
exactMatches []Suggestion
exactWords []Suggestion
exactMatches []Suggestion
moreSuggestions []Suggestion
)

select {
Expand All @@ -82,60 +84,81 @@ func (varnam *Varnam) channelGetFromDictionary(ctx context.Context, word string,
default:
start := time.Now()

dictSugs := varnam.getFromDictionary(ctx, tokens)
dictResult := varnam.getFromDictionary(ctx, tokens)

if varnam.Debug {
fmt.Println("Dictionary results:", dictSugs)
fmt.Println("Dictionary results:", dictResult)
}

if len(dictSugs.sugs) > 0 {
if dictSugs.exactMatch == false {
// These will be partial words
restOfWord := word[dictSugs.longestMatchPosition+1:]
if len(dictResult.exactMatches) > 0 {
start := time.Now()

start := time.Now()
// Since partial words are in dictionary, exactMatch will be TRUE
// for pathway to a word. Hence we're calling this here
moreFromDict := varnam.getMoreFromDictionary(ctx, dictResult.exactMatches)

dictResults = varnam.tokenizeRestOfWord(ctx, restOfWord, dictSugs.sugs, varnam.DictionarySuggestionsLimit)
if varnam.Debug {
fmt.Println("More dictionary results:", moreFromDict)
}

if LOG_TIME_TAKEN {
log.Printf("%s took %v\n", "tokenizeRestOfWord", time.Since(start))
exactWords = moreFromDict.exactWords

// Intersection of slices.
// exactMatches shouldn't have items from exactWords
hash := make(map[string]bool)
for i := range exactWords {
hash[exactWords[i].Word] = true
}
for _, sug := range dictResult.exactMatches {
if _, found := hash[sug.Word]; !found {
exactMatches = append(exactMatches, sug)
}
} else {
exactMatches = dictSugs.sugs
}

for _, sugSet := range moreFromDict.moreSuggestions {
moreSuggestions = append(moreSuggestions, sugSet...)
}

start := time.Now()
if LOG_TIME_TAKEN {
log.Printf("%s took %v\n", "getMoreFromDictionary", time.Since(start))
}
}

// Since partial words are in dictionary, exactMatch will be TRUE
// for pathway to a word. Hence we're calling this here
moreFromDict := varnam.getMoreFromDictionary(ctx, dictSugs.sugs)
if len(dictResult.partialMatches) > 0 {
// Tokenize the word after the longest match found in dictionary
restOfWord := word[dictResult.longestMatchPosition+1:]

if varnam.Debug {
fmt.Println("More dictionary results:", moreFromDict)
}
start := time.Now()

for _, sugSet := range moreFromDict {
dictResults = append(dictResults, sugSet...)
}
moreSuggestions = varnam.tokenizeRestOfWord(
ctx,
restOfWord,
dictResult.partialMatches,
varnam.DictionarySuggestionsLimit,
)

if LOG_TIME_TAKEN {
log.Printf("%s took %v\n", "getMoreFromDictionary", time.Since(start))
}
if LOG_TIME_TAKEN {
log.Printf("%s took %v\n", "tokenizeRestOfWord", time.Since(start))
}
}

if LOG_TIME_TAKEN {
log.Printf("%s took %v\n", "channelGetFromDictionary", time.Since(start))
}

channel <- channelDictionaryResult{exactMatches, dictResults}
channel <- channelDictionaryResult{
exactWords,
exactMatches,
moreSuggestions,
}
close(channel)
}
}

func (varnam *Varnam) channelGetFromPatternDictionary(ctx context.Context, word string, channel chan channelDictionaryResult) {
var (
dictResults []Suggestion
exactMatches []Suggestion
exactWords []Suggestion
moreSuggestions []Suggestion
)

select {
Expand Down Expand Up @@ -168,10 +191,10 @@ func (varnam *Varnam) channelGetFromPatternDictionary(ctx context.Context, word

partialMatches = append(partialMatches, match)
} else if match.Length == len(word) {
// Same length
exactMatches = append(exactMatches, match.Sug)
// Same length, exact word matched
exactWords = append(exactWords, match.Sug)
} else {
dictResults = append(dictResults, match.Sug)
moreSuggestions = append(moreSuggestions, match.Sug)
}
}

Expand All @@ -181,14 +204,19 @@ func (varnam *Varnam) channelGetFromPatternDictionary(ctx context.Context, word
perMatchLimit = perMatchLimit / len(partialMatches)
}

for _, match := range partialMatches {
restOfWord := word[match.Length:]
for i := range partialMatches {
restOfWord := word[partialMatches[i].Length:]

filled := varnam.tokenizeRestOfWord(ctx, restOfWord, []Suggestion{match.Sug}, perMatchLimit)
filled := varnam.tokenizeRestOfWord(
ctx,
restOfWord,
[]Suggestion{partialMatches[i].Sug},
perMatchLimit,
)

dictResults = append(dictResults, filled...)
moreSuggestions = append(moreSuggestions, filled...)

if len(dictResults) >= varnam.PatternDictionarySuggestionsLimit {
if len(moreSuggestions) >= varnam.PatternDictionarySuggestionsLimit {
break
}
}
Expand All @@ -198,12 +226,16 @@ func (varnam *Varnam) channelGetFromPatternDictionary(ctx context.Context, word
log.Printf("%s took %v\n", "channelGetFromPatternDictionary", time.Since(start))
}

channel <- channelDictionaryResult{exactMatches, dictResults}
channel <- channelDictionaryResult{
exactWords,
[]Suggestion{}, // Not applicable for patterns dictionary
moreSuggestions,
}
close(channel)
}
}

func (varnam *Varnam) channelGetMoreFromDictionary(ctx context.Context, sugs []Suggestion, channel chan [][]Suggestion) {
func (varnam *Varnam) channelGetMoreFromDictionary(ctx context.Context, sugs []Suggestion, channel chan MoreDictionaryResult) {
select {
case <-ctx.Done():
close(channel)
Expand Down