Skip to content

Commit

Permalink
Merge pull request #1523 from yext/feature/chinese-i18n
Browse files Browse the repository at this point in the history
chinese i18n tweaks

update edge supported locales for voice search (#1519)
rename zh-CN to zh-Hans and zh-TW to zh-Hant (#1517)
  • Loading branch information
oshi97 committed Aug 19, 2021
2 parents ac8a6b8 + c42509c commit fdfd847
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 25 deletions.
41 changes: 32 additions & 9 deletions conf/i18n/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ const LANGUAGES_TO_LOCALES = {
sv: [
'sv_SE'
],
'zh-CN': [
'zh-CN_CN',
'zh_CN_HK',
'zh_CN_SG'
'zh-Hans': [
'zh-Hans_CN',
'zh-Hans_HK',
'zh-Hans_SG'
],
'zh-TW': [
'zh-TW_HK',
'zh-TW_MO',
'zh-TW_TW'
'zh-Hant': [
'zh-Hant_HK',
'zh-Hant_MO',
'zh-Hant_TW'
]
};
exports.LANGUAGES_TO_LOCALES = LANGUAGES_TO_LOCALES;
Expand Down Expand Up @@ -186,5 +186,28 @@ exports.SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE = [
'de-at',
'de-ch',
'de-de',
'ja-jp'
'ja-jp',
'ar-ae',
'ar-bh',
'ar-dz',
'ar-iq',
'ar-kw',
'ar-ly',
'ar-om',
'ar-ps',
'ar-qa',
'ar-sa',
'nl-be',
'nl-nl',
'pt-br',
'pt-pt',
'sv-se',
'zh-hans-cn',
'zh-hans-hk',
'zh-hant-hk',
'zh-hant-tw',

// Below are locales that are not the SDK's built-in locales, but are supported by edge
'zh-cn',
'zh-tw'
];
File renamed without changes.
File renamed without changes.
39 changes: 32 additions & 7 deletions src/core/speechrecognition/locales.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE } from '../constants';
import { parseLocale } from '../utils/i18nutils';

/**
* Transforms the given locale to a locale Microsoft Edge can understand.
Expand All @@ -9,15 +10,39 @@ import { SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE } from '../constants';
* @param {string} locale
* @returns {string}
*/
export function transformSpeechRecognitionLocaleForEdge (locale) {
const underscoreIndex = locale.indexOf('_');
if (underscoreIndex === -1) {
return locale;
export function transformSpeechRecognitionLocaleForEdge (rawLocale) {
const { language, modifier, region } = parseLocale(rawLocale);
if (!modifier && !region) {
return language;
}
locale = locale.replace('_', '-');
const isCompatibleWithEdge = SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE.includes(locale.toLowerCase());
const locale = formatLocaleForEdge(language, modifier, region);
const isCompatibleWithEdge = SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE.includes(locale);
if (isCompatibleWithEdge) {
return locale;
}
return locale.substring(0, underscoreIndex);
if (modifier) {
return formatLocaleForEdge(language, modifier);
}
return language;
}

/**
* Formats a locale code given its constituent parts for Edge (which does not accept underscores).
* Edge does not care about capitalization, but converting to full lowercase allows for easier lookup
* within the SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE array.
*
* @param {string} language zh in zh-Hans_CH
* @param {string?} modifier Hans in zh-Hans_CH
* @param {string?} region CH in zh-Hans_CH
* @returns
*/
function formatLocaleForEdge (language, modifier, region) {
let result = language;
if (modifier) {
result += '-' + modifier;
}
if (region) {
result += '-' + region;
}
return result.toLowerCase();
}
51 changes: 51 additions & 0 deletions src/core/utils/i18nutils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* Parses a locale code into its constituent parts.
* Performs case formatting on the result.
*
* @param {string} localeCode
* @returns { language: string, modifier?: string, region?: string }
*/
export function parseLocale (localeCode) {
const localeCodeSections = localeCode.replace(/-/g, '_').split('_');
const language = localeCodeSections[0].toLowerCase();
const parseModifierAndRegion = () => {
const numSections = localeCodeSections.length;
if (numSections === 1) {
return {};
} else if (numSections === 2 && language === 'zh') {
const ambiguous = localeCodeSections[1].toLowerCase();
if (['hans', 'hant'].includes(ambiguous)) {
return { modifier: ambiguous };
} else {
return { region: ambiguous };
}
} else if (numSections === 2) {
return { region: localeCodeSections[1] };
} else if (numSections === 3) {
return {
modifier: localeCodeSections[1],
region: localeCodeSections[2]
};
} else if (numSections > 3) {
throw new Error(
`Encountered strangely formatted locale "${localeCode}", ` +
`with ${numSections} sections.`);
}
};
const capitalizeFirstLetterOnly = raw => {
return raw.charAt(0).toUpperCase() + raw.slice(1).toLowerCase();
};
const parsedLocale = {
language,
...parseModifierAndRegion()
};

if (parsedLocale.modifier) {
parsedLocale.modifier = capitalizeFirstLetterOnly(parsedLocale.modifier);
}
if (parsedLocale.region) {
parsedLocale.region = parsedLocale.region.toUpperCase();
}

return parsedLocale;
}
22 changes: 13 additions & 9 deletions tests/core/speechrecognition/locales.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,31 @@ jest.mock('../../../src/core/constants', () => ({
require('../../../conf/i18n/constants').SPEECH_RECOGNITION_LOCALES_SUPPORTED_BY_EDGE
}));

it('does nothing when no underscore', () => {
it('works for plain languages', () => {
expect(transformSpeechRecognitionLocaleForEdge('en')).toEqual('en');
expect(transformSpeechRecognitionLocaleForEdge('ZH_Hans')).toEqual('zh-hans');
});

it('will recognize supported locales that have dashes', () => {
expect(transformSpeechRecognitionLocaleForEdge('en-US')).toEqual('en-US');
expect(transformSpeechRecognitionLocaleForEdge('en-GB')).toEqual('en-GB');
expect(transformSpeechRecognitionLocaleForEdge('en-US')).toEqual('en-us');
expect(transformSpeechRecognitionLocaleForEdge('en-GB')).toEqual('en-gb');
expect(transformSpeechRecognitionLocaleForEdge('zh-Hant-tw')).toEqual('zh-hant-tw');
});

it('defaults Edge incompatible locales to the language code', () => {
expect(transformSpeechRecognitionLocaleForEdge('ja_FAKE')).toEqual('ja');
expect(transformSpeechRecognitionLocaleForEdge('en_AI')).toEqual('en');
expect(transformSpeechRecognitionLocaleForEdge('zH_hAns_fake')).toEqual('zh-hans');
expect(transformSpeechRecognitionLocaleForEdge('ZH-HANS-FAKE')).toEqual('zh-hans');
});

it('replaces underscores with dashes for supported locales', () => {
expect(transformSpeechRecognitionLocaleForEdge('en_US')).toEqual('en-US');
expect(transformSpeechRecognitionLocaleForEdge('en_GB')).toEqual('en-GB');
expect(transformSpeechRecognitionLocaleForEdge('en_US')).toEqual('en-us');
expect(transformSpeechRecognitionLocaleForEdge('en_GB')).toEqual('en-gb');
});

it('is case insensitive', () => {
expect(transformSpeechRecognitionLocaleForEdge('en_us')).toEqual('en-us');
expect(transformSpeechRecognitionLocaleForEdge('EN_AI')).toEqual('EN');
expect(transformSpeechRecognitionLocaleForEdge('jA_AI')).toEqual('jA');
it('canonicalizes case', () => {
expect(transformSpeechRecognitionLocaleForEdge('en_uS')).toEqual('en-us');
expect(transformSpeechRecognitionLocaleForEdge('EN_AI')).toEqual('en');
expect(transformSpeechRecognitionLocaleForEdge('jA_AI')).toEqual('ja');
});

0 comments on commit fdfd847

Please sign in to comment.