Skip to content

Commit

Permalink
emoji-search: implement algorithm to allow multiword prefix search.
Browse files Browse the repository at this point in the history
This CL implements the ranking algorithms to enable multiword prefix
search. The formula is provided in https://docs.google.com/document/d/1Ub89xsElqVyRaq8tldhligd29-iXsSMWlAdL6Q-Xpr8/edit#heading=h.c0uts5ftkk58

* `EmojiPrefixSearch` class now supports query like 'sm ca',
'smili face'.
* New algorithm beats FuSeJS by 1.5-10 times on query time. Please
refer to this sheets for some performance recording: https://docs.google.com/spreadsheets/d/1wzPh2PvYCWqLcfKMlj40kyO_n8IjHSvdv6qHFi9h9Ac/edit?usp=sharing
Bug: b/219364826
Demo: None. Please follow along unit tests to see what is the expected
output.

Change-Id: Ia61065ef91c447b3b9aeec8f6f667176f522d6ad
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3458342
Reviewed-by: David Vallet <dvallet@chromium.org>
Reviewed-by: John Palmer <jopalmer@chromium.org>
Commit-Queue: Chuong Ho <hdchuong@google.com>
Cr-Commit-Position: refs/heads/main@{#972766}
  • Loading branch information
Chuong Ho authored and Chromium LUCI CQ committed Feb 18, 2022
1 parent ecbece5 commit 1f63ff4
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 9 deletions.
111 changes: 110 additions & 1 deletion chrome/browser/resources/chromeos/emoji_picker/prefix_search.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export class EmojiPrefixSearch {
for (let record of collection) {
const string = record.base.string;
const name = record.base.name;
const terms = name.split(' ').map(term => this.sanitize_(term));
const terms = this.tokenize_(name).map(term => this.sanitize_(term));
terms.forEach(term => {
if (!this.wordToEmojisMap_.has(term)) {
this.wordToEmojisMap_.set(term, new Set());
Expand Down Expand Up @@ -75,4 +75,113 @@ export class EmojiPrefixSearch {
this.wordToEmojisMap_.clear();
this.emojiMap_.clear();
}

/**
* Split a phrase into tokens.
* @private
* @param {string} phrase
* @returns {!Array<string>} array of non-empty tokens.
*/
tokenize_(phrase) {
return phrase.split(' ').filter(token => token.length > 0);
}

/**
* Fetch all words from the emoji data that have 'term' has prefix and attach
* matching metadata for each word.
* @param {!EmojiVariants} emoji
* @param {string} term
* @returns {!Array<{pos: Number, isMatched: Boolean, token: String, weight:
* Number}>} Array of matching metadata.
*/
getMatchedKeywords_(emoji, term) {
const PRIMARY_NAME_WEIGHT = 1;
return this.tokenize_(this.sanitize_(emoji.base.name))
.map((token, pos) => ({
pos,
isMatched: token.startsWith(term),
token,
weight: PRIMARY_NAME_WEIGHT
}))
.filter(item => item.isMatched);
}

/**
* Calculate the matching score of a term against a given emoji.
* @param {!EmojiVariants} emoji
* @param {string} term
* @throws Thrown when any matched word from emoji description is empty.
* @returns {number}
*/
scoreTermAgainstEmoji(emoji, term) {
let score = 0;
for (const item of this.getMatchedKeywords_(emoji, term)) {
if (item.token.length === 0) {
throw new Error('Token can not be empty.');
}
// Link to single-word match score formula:
// https://docs.google.com/document/d/1Ub89xsElqVyRaq8tldhligd29-iXsSMWlAdL6Q-Xpr8/edit#
score +=
(item.weight / (1 + item.pos)) * (term.length / item.token.length);
}
return score;
}

/**
* Search for all items that match with the given query
* @param {string} query multi-word query
* @returns {!Array<{item: !EmojiVariants, score: number}>} an array of
* matched items.
*/
search(query) {
const queryScores = new Map();
const sanitizedQuery = this.sanitize_(query);
this.tokenize_(sanitizedQuery).forEach((term, idx) => {
// For each token
const termScores = new Map();
const candidateEmojis = this.matchPrefixToEmojis(term);

for (const emoji of candidateEmojis) {
const emojiRecord = this.emojiMap_.get(emoji);
termScores.set(emoji, this.scoreTermAgainstEmoji(emojiRecord, term));
}

for (const emoji of termScores.keys()) {
// If it is the first term in the query phrase, we apply the
// normalization factor.
if (idx === 0) {
const emojiName = this.emojiMap_.get(emoji).base.name;
queryScores.set(emoji, sanitizedQuery.length / emojiName.length);
}
if (queryScores.has(emoji)) {
queryScores.set(
emoji, queryScores.get(emoji) * termScores.get(emoji));
}
}

// Remove any emoji at query level if it does not match at term level.
for (const emoji of queryScores.keys()) {
if (!termScores.has(emoji)) {
queryScores.delete(emoji);
}
}
});

let results =
Array.from(queryScores.keys()).map(emoji => ({
item: this.emojiMap_.get(emoji),
score: queryScores.get(emoji)
}));
return this.sort_(results);
}

/**
* Sort the array of Emoji objects by relevance score in descending order
* @param {!Array<{item: !EmojiVariants, score: number}>} results
* @returns {!Array<{item: !EmojiVariants, score: number}>} the sorted array
* of Emoji objects.
*/
sort_(results) {
return results.sort((emoji1, emoji2) => emoji2.score - emoji1.score);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
// found in the LICENSE file.

import {EmojiPrefixSearch} from 'chrome://emoji-picker/prefix_search.js';
import {assertEquals, assertFalse, assertTrue} from '../../chai_assert.js';
import {assertArrayEquals, assertEquals, assertFalse, assertTrue} from '../../chai_assert.js';
import {assertCloseTo} from './emoji_picker_test_util.js';

const mockCollection1 = [
{base: {string: '😹', name: 'cat with tears of joy'}},
Expand Down Expand Up @@ -62,4 +63,71 @@ suite('PrefixSearchUnitTest', () => {
assertTrue(actualResults3.includes('😹'));
assertFalse(actualResults3.includes('🥲'));
});

test('Scoring single term against emoji using emoji name.', () => {
const emojiRecord1 = mockCollection2[0];
const emojiRecord2 = mockCollection2[1];
const emojiRecord3 = mockCollection2[2];
const emojiRecord4 = mockCollection2[3];

assertCloseTo(
prefixSearch.scoreTermAgainstEmoji(emojiRecord1, 'smil'), 4 / 7);
assertCloseTo(prefixSearch.scoreTermAgainstEmoji(emojiRecord1, 'cat'), 0.5);
assertCloseTo(
prefixSearch.scoreTermAgainstEmoji(emojiRecord2, 'smil'), 0.2);
assertCloseTo(prefixSearch.scoreTermAgainstEmoji(emojiRecord2, 'cat'), 1);
assertEquals(prefixSearch.scoreTermAgainstEmoji(emojiRecord3, 'smil'), 0);
assertEquals(
prefixSearch.scoreTermAgainstEmoji(emojiRecord3, 'cat'), 3 / 11);
assertEquals(
prefixSearch.scoreTermAgainstEmoji(emojiRecord4, 'smil'), 4 / 7);
assertEquals(prefixSearch.scoreTermAgainstEmoji(emojiRecord4, 'cat'), 0);
});

test(
'Multi-word prefix search should return the correct emojis and scores.',
() => {
prefixSearch.setCollection(mockCollection2);

const actualMatches = prefixSearch.search('smil cat');

assertEquals(2, actualMatches.length);
assertEquals(actualMatches[0].item.base.string, '😺');
assertCloseTo(actualMatches[0].score, 16 / 77);
assertEquals(actualMatches[1].item.base.string, '😼');
assertCloseTo(actualMatches[1].score, 4 / 45);
});

test(
'Order of prefix terms in the query should not affect search results.',
() => {
prefixSearch.setCollection(mockCollection2);

assertArrayEquals(
prefixSearch.search('smil cat'), prefixSearch.search('cat smil'));
assertArrayEquals(
prefixSearch.search('with smi'), prefixSearch.search('smi with'));
});

test(
'Matches on longer parts of emoji name should rank higher than on ' +
'shorter ones.',
() => {
prefixSearch.setCollection(mockCollection2);

const actualMatches = prefixSearch.search('smiling');

assertEquals(2, actualMatches.length);
assertEquals(actualMatches[0].item.base.name, 'smiling cat');
assertEquals(actualMatches[1].item.base.name, 'smiling face with halo');
});

test(
'The case of the search query should not affect the results returned.',
() => {
prefixSearch.setCollection(mockCollection2);

assertArrayEquals(
prefixSearch.search('Smi With'), prefixSearch.search('sMI WITh'));
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,7 @@ import {flush} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min

import {assertEquals, assertFalse, assertGT, assertLT, assertTrue} from '../../chai_assert.js';

import {deepQuerySelector, dispatchMouseEvent, isGroupButtonActive, timeout, waitForCondition, waitForEvent, waitWithTimeout} from './emoji_picker_test_util.js';

function assertCloseTo(actual, expected) {
assertTrue(
Math.abs(1 - actual / expected) <= 0.001,
`expected ${expected} to be close to ${actual}`);
}
import {assertCloseTo, deepQuerySelector, dispatchMouseEvent, isGroupButtonActive, timeout, waitForCondition, waitForEvent, waitWithTimeout} from './emoji_picker_test_util.js';


suite('<emoji-picker>', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
// found in the LICENSE file.

import {assert} from 'chrome://resources/js/assert.m.js';
import {assertTrue} from '../../chai_assert.js';

export function assertCloseTo(actual, expected) {
assertTrue(
Math.abs(1 - actual / expected) <= 0.001,
`expected ${expected} to be close to ${actual}`);
}

/**
* Queries for an element through a path of custom elements.
Expand Down

0 comments on commit 1f63ff4

Please sign in to comment.