diff --git a/demo/client.ts b/demo/client.ts index d5196d3779..baa1f11b4c 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -208,6 +208,7 @@ function initOptions(term: TerminalType): void { fontWeight: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], fontWeightBold: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], rendererType: ['dom', 'canvas'], + unicodeVersion: (term as any)._core.unicodeManager.registeredVersions.map(String), experimentalBufferLineImpl: ['JsArray', 'TypedArray'] }; const options = Object.keys((term)._core.options); diff --git a/src/Buffer.test.ts b/src/Buffer.test.ts index db8a460d09..279f0140c6 100644 --- a/src/Buffer.test.ts +++ b/src/Buffer.test.ts @@ -491,7 +491,7 @@ describe('Buffer', () => { }); it('fullwidth combining with emoji - match emoji cell', () => { - const input = 'Lots of ¥\u0301 make me 😃.'; + const input = 'Lots of ¥\u0301 make me very 😃.'; terminal.writeSync(input); const s = terminal.buffer.iterator(true).next().content; assert.equal(input, s); diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 7604b01f69..4843275638 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -9,7 +9,6 @@ import { C0, C1 } from './common/data/EscapeSequences'; import { CHARSETS, DEFAULT_CHARSET } from './core/data/Charsets'; import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX, DEFAULT_ATTR, NULL_CELL_CHAR, NULL_CELL_WIDTH, NULL_CELL_CODE } from './Buffer'; import { FLAGS } from './renderer/Types'; -import { wcwidth } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; import { ICharset } from './core/Types'; import { Disposable } from './common/Lifecycle'; @@ -335,6 +334,7 @@ export class InputHandler extends Disposable implements IInputHandler { const wraparoundMode: boolean = this._terminal.wraparoundMode; const insertMode: boolean = this._terminal.insertMode; const curAttr: number = this._terminal.curAttr; + const wcwidth = this._terminal.unicodeManager.wcwidth; let bufferRow = buffer.lines.get(buffer.y + buffer.ybase); this._terminal.updateRange(buffer.y); diff --git a/src/Linkifier.ts b/src/Linkifier.ts index 2ec3ed400d..2a8d296365 100644 --- a/src/Linkifier.ts +++ b/src/Linkifier.ts @@ -8,7 +8,6 @@ import { ILinkHoverEvent, ILinkMatcher, LinkMatcherHandler, LinkHoverEventTypes, import { MouseZone } from './ui/MouseZoneManager'; import { EventEmitter } from './common/EventEmitter'; import { CHAR_DATA_ATTR_INDEX } from './Buffer'; -import { getStringCellWidth } from './CharWidth'; /** * The Linkifier applies links to rows shortly after they have been refreshed. @@ -256,7 +255,8 @@ export class Linkifier extends EventEmitter implements ILinkifier { * @param fg The link color for hover event. */ private _addLink(x: number, y: number, uri: string, matcher: ILinkMatcher, fg: number): void { - const width = getStringCellWidth(uri); + // FIXME: to support unicode version runtime switch replace this by endIndex calculation + const width = this._terminal.unicodeManager.getStringCellWidth(uri); const x1 = x % this._terminal.cols; const y1 = y + Math.floor(x / this._terminal.cols); let x2 = (x1 + width) % this._terminal.cols; diff --git a/src/Terminal.ts b/src/Terminal.ts index 8e89c9ca7b..c883536897 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -52,6 +52,7 @@ import { DomRenderer } from './renderer/dom/DomRenderer'; import { IKeyboardEvent } from './common/Types'; import { evaluateKeyboardEvent } from './core/input/Keyboard'; import { KeyboardResultType, ICharset } from './core/Types'; +import { UnicodeVersionManager } from './UnicodeManager'; // Let it work inside Node.js for automated testing purposes. const document = (typeof window !== 'undefined') ? window.document : null; @@ -106,6 +107,7 @@ const DEFAULT_OPTIONS: ITerminalOptions = { theme: null, rightClickSelectsWord: Browser.isMac, rendererType: 'canvas', + unicodeVersion: '11', experimentalBufferLineImpl: 'JsArray' }; @@ -193,6 +195,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II private _userScrolling: boolean; private _inputHandler: InputHandler; + public unicodeManager: UnicodeVersionManager; public soundManager: SoundManager; public renderer: IRenderer; public selectionManager: SelectionManager; @@ -302,6 +305,8 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II // this._writeStopped = false; this._userScrolling = false; + this.unicodeManager = new UnicodeVersionManager(); + this.register(this.unicodeManager); this._inputHandler = new InputHandler(this); this.register(this._inputHandler); // Reuse renderer if the Terminal is being recreated via a reset call. @@ -496,6 +501,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II } break; case 'tabStopWidth': this.buffers.setupTabStops(); break; + case 'unicodeVersion': this.unicodeManager.activeVersion = parseFloat(value); break; case 'experimentalBufferLineImpl': this.buffers.normal.setBufferLineFactory(value); this.buffers.alt.setBufferLineFactory(value); diff --git a/src/Types.ts b/src/Types.ts index 430c6575bd..638ddfc9b1 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -86,6 +86,7 @@ export interface IInputHandlingTerminal extends IEventEmitter { handleTitle(title: string): void; index(): void; reverseIndex(): void; + unicodeManager: IUnicodeVersionManager; } export interface IViewport extends IDisposable { @@ -220,6 +221,7 @@ export interface ITerminal extends PublicTerminal, IElementAccessor, IBufferAcce viewport: IViewport; bracketedPasteMode: boolean; applicationCursor: boolean; + unicodeManager: IUnicodeVersionManager; /** * Emit the 'data' event and populate the given data. @@ -529,3 +531,21 @@ export interface IBufferLine { export interface IBufferLineConstructor { new(cols: number, fillCharData?: CharData, isWrapped?: boolean): IBufferLine; } + +/** + * Interface for unicode version implementations. + */ +export interface IUnicodeVersionProvider { + version: number; + init(): void; + wcwidth(ucs: number): number; +} + +export interface IUnicodeVersionManager { + addRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void; + removeRegisterListener(callback: (version: number, provider: IUnicodeVersionManager) => void): void; + registeredVersions: number[]; + activeVersion: number; + wcwidth(ucs: number): number; + getStringCellWidth(s: string): number; +} diff --git a/src/UnicodeManager.test.ts b/src/UnicodeManager.test.ts new file mode 100644 index 0000000000..f666479191 --- /dev/null +++ b/src/UnicodeManager.test.ts @@ -0,0 +1,112 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { assert } from 'chai'; +import { UnicodeVersionManager } from './UnicodeManager'; +import { IUnicodeVersionProvider } from './Types'; + +const VERSION_DUMMY1: IUnicodeVersionProvider = { + version: 15, + wcwidth: (n: number) => n, + init: () => {} +}; +const VERSION_DUMMY2: IUnicodeVersionProvider = { + version: 17, + wcwidth: (n: number) => n, + init: () => {} +}; + +describe('UnicodeProvider', function(): void { + describe('static part', function(): void { + + it('provided default versions', function(): void { + assert.deepEqual(UnicodeVersionManager.registeredVersions, [6, 11]); + }); + + it('add version', function(): void { + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.deepEqual(UnicodeVersionManager.registeredVersions, [6, 11, 15]); + delete UnicodeVersionManager.versions[15]; + }); + + it('register callback', function(): void { + let gotCalled = false; + UnicodeVersionManager.addRegisterListener((version) => { + assert.equal(version, 15); + gotCalled = true; + }); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + delete UnicodeVersionManager.versions[15]; + UnicodeVersionManager.removeAllRegisterListener(); + }); + + it('remove callback', function(): void { + let gotCalled = false; + const listener = (version: number) => { + assert.equal(version, 15); + gotCalled = true; + }; + UnicodeVersionManager.addRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + UnicodeVersionManager.removeRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeVersionManager.versions[15]; + delete UnicodeVersionManager.versions[17]; + UnicodeVersionManager.removeAllRegisterListener(); + }); + }); + describe('instance', function(): void { + let provider: UnicodeVersionManager; + + beforeEach(function(): void { + provider = new UnicodeVersionManager(); + }); + + it('highest version activated by default', function(): void { + assert.equal(provider.activeVersion, 11); + }); + + it('activate exact', function(): void { + assert.throws(() => provider.activeVersion = 5); + assert.throws(() => provider.activeVersion = 7); + assert.throws(() => provider.activeVersion = 10); + assert.throws(() => provider.activeVersion = 12); + assert.throws(() => provider.activeVersion = 200); + assert.doesNotThrow(() => provider.activeVersion = 6); + assert.doesNotThrow(() => provider.activeVersion = 11); + }); + + it('register/remove callback', function(): void { + let gotCalled = false; + const listener = (version: number, prov: UnicodeVersionManager) => { + assert.equal(version, 15); + assert.equal(prov, provider); + gotCalled = true; + }; + provider.addRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + provider.removeRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeVersionManager.versions[15]; + delete UnicodeVersionManager.versions[17]; + UnicodeVersionManager.removeAllRegisterListener(); + provider.dispose(); + }); + + it('unicode test', function(): void { + const data = '🔷🔷🔷🔷🔷'; + provider.activeVersion = 6; + assert.equal(provider.getStringCellWidth(data), 5); + provider.activeVersion = 11; + assert.equal(provider.getStringCellWidth(data), 10); + }); + }); +}); diff --git a/src/UnicodeManager.ts b/src/UnicodeManager.ts new file mode 100644 index 0000000000..8aef8f10f1 --- /dev/null +++ b/src/UnicodeManager.ts @@ -0,0 +1,157 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeVersionProvider, IUnicodeVersionManager } from './Types'; +import { v6 } from './unicode/v6'; +import { v11 } from './unicode/v11'; +import { Disposable } from './common/Lifecycle'; + +type RegisterCallback = [(version: number, provider: UnicodeVersionManager) => void, (version: number) => void]; + +/** + * Class to provide access to different unicode version implementations. + * + * The version related implementations are stored statically + * to avoid recreating them for every single instance. + * + * An instance of this class is meant to serve unicode specific implementations + * for a single terminal instance. This way multiple terminals can have + * different unicode settings active while still referring to the + * same underlying implementations. + */ +export class UnicodeVersionManager extends Disposable implements IUnicodeVersionManager { + public static versions: {[version: number]: IUnicodeVersionProvider} = {}; + private static _registerCallbacks: ((version: number) => void)[] = []; + + public static addRegisterListener(callback: (version: number) => void): void { + UnicodeVersionManager._registerCallbacks.push(callback); + } + + public static removeRegisterListener(callback: (version: number) => void): void { + const pos = UnicodeVersionManager._registerCallbacks.indexOf(callback); + if (pos !== -1) { + UnicodeVersionManager._registerCallbacks.splice(pos, 1); + } + } + + public static removeAllRegisterListener(): void { + UnicodeVersionManager._registerCallbacks = []; + } + + /** + * Register an unicode version. + * Possible entry point for unicode addons. + * In conjuction with `addRegisterListener` it can be used + * to load and use versions lazy. + */ + public static registerVersion(impl: IUnicodeVersionProvider): void { + if (UnicodeVersionManager.versions[impl.version]) { + throw new Error(`unicode version "${impl.version}" already registered`); + } + UnicodeVersionManager.versions[impl.version] = impl; + UnicodeVersionManager._registerCallbacks.forEach(cb => cb(impl.version)); + } + + public static get registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeVersionManager.versions).map(parseFloat).sort((a, b) => a - b); + } + + private _version: number; + private _registerCallbacks: RegisterCallback[] = []; + public wcwidth: (ucs: number) => number; + + // defaults to the highest available version + constructor(version?: number) { + super(); + const versions = this.registeredVersions; + this.activeVersion = versions[version || versions.length - 1]; + } + + public dispose(): void { + this._registerCallbacks.forEach(el => UnicodeVersionManager.removeRegisterListener(el[1])); + this._registerCallbacks = null; + this.wcwidth = null; + } + + /** + * Callback to run when a version got registered. + * Gets the newly registered version and + * the `UnicodeProvider` instance as arguments. + */ + public addRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void { + const func: (version: number) => void = (version) => callback(version, this); + this._registerCallbacks.push([callback, func]); + UnicodeVersionManager.addRegisterListener(func); + } + + /** + * Remove register listener. + */ + public removeRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void { + let pos = -1; + for (let i = 0; i < this._registerCallbacks.length; ++i) { + if (this._registerCallbacks[i][0] === callback) { + pos = i; + break; + } + } + if (pos !== -1) { + UnicodeVersionManager.removeRegisterListener(this._registerCallbacks[pos][1]); + this._registerCallbacks.splice(pos, 1); + } + } + + /** + * Get a list of currently registered unicode versions. + */ + public get registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeVersionManager.versions).map(parseFloat).sort((a, b) => a - b); + } + + /** + * Get active unicode version. + */ + public get activeVersion(): number { + return this._version; + } + + /** + * Set active unicode version. + */ + public set activeVersion(version: number) { + if (!this.registeredVersions.length || !UnicodeVersionManager.versions[version]) { + throw new Error(`unicode version "${version}" not registered`); + } + // init lookup table and swap wcwidth impl + UnicodeVersionManager.versions[version].init(); + this.wcwidth = UnicodeVersionManager.versions[version].wcwidth; + this._version = version; + } + + /** + * Get the terminal cell width for a given string. + */ + public getStringCellWidth(s: string): number { + let result = 0; + for (let i = 0; i < s.length; ++i) { + let code = s.charCodeAt(i); + if (0xD800 <= code && code <= 0xDBFF) { + const low = s.charCodeAt(i + 1); + if (isNaN(low)) { + return result; + } + code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; + } + if (0xDC00 <= code && code <= 0xDFFF) { + continue; + } + result += this.wcwidth(code); + } + return result; + } +} + +// register statically shipped versions +UnicodeVersionManager.registerVersion(v6); +UnicodeVersionManager.registerVersion(v11); diff --git a/src/unicode/v11.test.ts b/src/unicode/v11.test.ts new file mode 100644 index 0000000000..ddb04b6909 --- /dev/null +++ b/src/unicode/v11.test.ts @@ -0,0 +1,566 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { v11 } from './v11'; + +// old implementation +const wcwidthOld = (function(opts: {nul: number, control: number}): (ucs: number) => number { + // Generated: 2018-09-24T16:45:44.483077 + // Source: DerivedGeneralCategory-11.0.0.txt + // Date: 2018-02-21, 05:34:04 GMT + const ZERO_WIDTH = [ + [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le + [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli + [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg + [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe + [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot + [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra + [0x064b, 0x065f], // Arabic Fathatan .. + [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip + [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen + [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda + [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon + [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem + [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip + [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh + [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun + [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot + [0x07fd, 0x07fd], // (nil) .. + [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh + [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A + [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + [0x0859, 0x085b], // (nil) .. + [0x08d3, 0x08e1], // (nil) .. + [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara + [0x093a, 0x093a], // (nil) .. + [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta + [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama + [0x0951, 0x0957], // Devanagari Stress Sign U.. + [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu + [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta + [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama + [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + [0x09fe, 0x09fe], // (nil) .. + [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak + [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara + [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta + [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama + [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + [0x0afa, 0x0aff], // (nil) .. + [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu + [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta + [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I + [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama + [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark + [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara + [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama + [0x0c00, 0x0c00], // (nil) .. + [0x0c04, 0x0c04], // (nil) .. + [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai + [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama + [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark + [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + [0x0c81, 0x0c81], // (nil) .. + [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta + [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I + [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E + [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama + [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + [0x0d00, 0x0d01], // (nil) .. + [0x0d3b, 0x0d3c], // (nil) .. + [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama + [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a + [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu + [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan + [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu + [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita + [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig + [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta + [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter + [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter + [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat + [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S + [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin + [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama + [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U + [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua + [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit + [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan + [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation + [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U + [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O + [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv + [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i + [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U + [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign + [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot + [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt + [0x1ab0, 0x1abe], // (nil) .. + [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang + [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan + [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L + [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol + [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar + [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan + [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + [0x1bab, 0x1bad], // (nil) .. + [0x1be6, 0x1be6], // (nil) .. + [0x1be8, 0x1be9], // (nil) .. + [0x1bed, 0x1bed], // (nil) .. + [0x1bef, 0x1bf1], // (nil) .. + [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T + [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta + [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha + [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak + [0x1cf4, 0x1cf4], // (nil) .. + [0x1cf8, 0x1cf9], // (nil) .. + [0x1dc0, 0x1df9], // Combining Dotted Grave A.. + [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea + [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above + [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu + [0x2d7f, 0x2d7f], // (nil) .. + [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette + [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton + [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag + [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous + [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer + [0xa69e, 0xa69f], // (nil) .. + [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk + [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. + [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig + [0xa8ff, 0xa8ff], // (nil) .. + [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R + [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar + [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + [0xa9e5, 0xa9e5], // (nil) .. + [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe + [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue + [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa + [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa7c, 0xaa7c], // (nil) .. + [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang + [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U + [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia + [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + [0xaaec, 0xaaed], // (nil) .. + [0xaaf6, 0xaaf6], // (nil) .. + [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 + [0xfe20, 0xfe2f], // Combining Ligature Left .. + [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + [0x102e0, 0x102e0], // (nil) .. + [0x10376, 0x1037a], // (nil) .. + [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama + [0x10ae5, 0x10ae6], // (nil) .. + [0x10d24, 0x10d27], // (nil) .. + [0x10f46, 0x10f50], // (nil) .. + [0x11001, 0x11001], // (nil) .. + [0x11038, 0x11046], // (nil) .. + [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara + [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta + [0x11100, 0x11102], // (nil) .. + [0x11127, 0x1112b], // (nil) .. + [0x1112d, 0x11134], // (nil) .. + [0x11173, 0x11173], // (nil) .. + [0x11180, 0x11181], // (nil) .. + [0x111b6, 0x111be], // (nil) .. + [0x111c9, 0x111cc], // (nil) .. + [0x1122f, 0x11231], // (nil) .. + [0x11234, 0x11234], // (nil) .. + [0x11236, 0x11237], // (nil) .. + [0x1123e, 0x1123e], // (nil) .. + [0x112df, 0x112df], // (nil) .. + [0x112e3, 0x112ea], // (nil) .. + [0x11300, 0x11301], // (nil) .. + [0x1133b, 0x1133c], // (nil) .. + [0x11340, 0x11340], // (nil) .. + [0x11366, 0x1136c], // (nil) .. + [0x11370, 0x11374], // (nil) .. + [0x11438, 0x1143f], // (nil) .. + [0x11442, 0x11444], // (nil) .. + [0x11446, 0x11446], // (nil) .. + [0x1145e, 0x1145e], // (nil) .. + [0x114b3, 0x114b8], // (nil) .. + [0x114ba, 0x114ba], // (nil) .. + [0x114bf, 0x114c0], // (nil) .. + [0x114c2, 0x114c3], // (nil) .. + [0x115b2, 0x115b5], // (nil) .. + [0x115bc, 0x115bd], // (nil) .. + [0x115bf, 0x115c0], // (nil) .. + [0x115dc, 0x115dd], // (nil) .. + [0x11633, 0x1163a], // (nil) .. + [0x1163d, 0x1163d], // (nil) .. + [0x1163f, 0x11640], // (nil) .. + [0x116ab, 0x116ab], // (nil) .. + [0x116ad, 0x116ad], // (nil) .. + [0x116b0, 0x116b5], // (nil) .. + [0x116b7, 0x116b7], // (nil) .. + [0x1171d, 0x1171f], // (nil) .. + [0x11722, 0x11725], // (nil) .. + [0x11727, 0x1172b], // (nil) .. + [0x1182f, 0x11837], // (nil) .. + [0x11839, 0x1183a], // (nil) .. + [0x11a01, 0x11a0a], // (nil) .. + [0x11a33, 0x11a38], // (nil) .. + [0x11a3b, 0x11a3e], // (nil) .. + [0x11a47, 0x11a47], // (nil) .. + [0x11a51, 0x11a56], // (nil) .. + [0x11a59, 0x11a5b], // (nil) .. + [0x11a8a, 0x11a96], // (nil) .. + [0x11a98, 0x11a99], // (nil) .. + [0x11c30, 0x11c36], // (nil) .. + [0x11c38, 0x11c3d], // (nil) .. + [0x11c3f, 0x11c3f], // (nil) .. + [0x11c92, 0x11ca7], // (nil) .. + [0x11caa, 0x11cb0], // (nil) .. + [0x11cb2, 0x11cb3], // (nil) .. + [0x11cb5, 0x11cb6], // (nil) .. + [0x11d31, 0x11d36], // (nil) .. + [0x11d3a, 0x11d3a], // (nil) .. + [0x11d3c, 0x11d3d], // (nil) .. + [0x11d3f, 0x11d45], // (nil) .. + [0x11d47, 0x11d47], // (nil) .. + [0x11d90, 0x11d91], // (nil) .. + [0x11d95, 0x11d95], // (nil) .. + [0x11d97, 0x11d97], // (nil) .. + [0x11ef3, 0x11ef4], // (nil) .. + [0x16af0, 0x16af4], // (nil) .. + [0x16b30, 0x16b36], // (nil) .. + [0x16f8f, 0x16f92], // (nil) .. + [0x1bc9d, 0x1bc9e], // (nil) .. + [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining + [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining + [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining + [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining + [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical + [0x1da00, 0x1da36], // (nil) .. + [0x1da3b, 0x1da6c], // (nil) .. + [0x1da75, 0x1da75], // (nil) .. + [0x1da84, 0x1da84], // (nil) .. + [0x1da9b, 0x1da9f], // (nil) .. + [0x1daa1, 0x1daaf], // (nil) .. + [0x1e000, 0x1e006], // (nil) .. + [0x1e008, 0x1e018], // (nil) .. + [0x1e01b, 0x1e021], // (nil) .. + [0x1e023, 0x1e024], // (nil) .. + [0x1e026, 0x1e02a], // (nil) .. + [0x1e8d0, 0x1e8d6], // (nil) .. + [0x1e944, 0x1e94a], // (nil) .. + [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 + ]; + + // Generated: 2018-09-24T16:45:44.464578 + // Source: EastAsianWidth-11.0.0.txt + // Date: 2018-05-14, 09:41:59 GMT [KW, LI] + const WIDE_EASTASIAN = [ + [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler + [0x231a, 0x231b], // Watch ..Hourglass + [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra + [0x23e9, 0x23ec], // (nil) .. + [0x23f0, 0x23f0], // (nil) .. + [0x23f3, 0x23f3], // (nil) .. + [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar + [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage + [0x2648, 0x2653], // Aries ..Pisces + [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol + [0x2693, 0x2693], // Anchor ..Anchor + [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign + [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle + [0x26bd, 0x26be], // Soccer Ball ..Baseball + [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud + [0x26ce, 0x26ce], // (nil) .. + [0x26d4, 0x26d4], // No Entry ..No Entry + [0x26ea, 0x26ea], // Church ..Church + [0x26f2, 0x26f3], // Fountain ..Flag In Hole + [0x26f5, 0x26f5], // Sailboat ..Sailboat + [0x26fa, 0x26fa], // Tent ..Tent + [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump + [0x2705, 0x2705], // (nil) .. + [0x270a, 0x270b], // (nil) .. + [0x2728, 0x2728], // (nil) .. + [0x274c, 0x274c], // (nil) .. + [0x274e, 0x274e], // (nil) .. + [0x2753, 0x2755], // (nil) .. + [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S + [0x2795, 0x2797], // (nil) .. + [0x27b0, 0x27b0], // (nil) .. + [0x27bf, 0x27bf], // (nil) .. + [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square + [0x2b50, 0x2b50], // White Medium Star ..White Medium Star + [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle + [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap + [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified + [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute + [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description + [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In + [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke + [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto + [0x3105, 0x312f], // Bopomofo Letter B .. + [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae + [0x3190, 0x31ba], // Ideographic Annotation L.. + [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q + [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha + [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto + [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo + [0x3300, 0x4dbf], // Square Apaato .. + [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr + [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke + [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih + [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. + [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve + [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop + [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign + [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At + [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa + [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign + [0x16fe0, 0x16fe1], // (nil) .. + [0x17000, 0x187f1], // (nil) .. + [0x18800, 0x18af2], // (nil) .. + [0x1b000, 0x1b11e], // (nil) .. + [0x1b170, 0x1b2fb], // (nil) .. + [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon + [0x1f0cf, 0x1f0cf], // (nil) .. + [0x1f18e, 0x1f18e], // (nil) .. + [0x1f191, 0x1f19a], // (nil) .. + [0x1f200, 0x1f202], // Square Hiragana Hoka .. + [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. + [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed + [0x1f250, 0x1f251], // (nil) .. + [0x1f260, 0x1f265], // (nil) .. + [0x1f300, 0x1f320], // (nil) .. + [0x1f32d, 0x1f335], // (nil) .. + [0x1f337, 0x1f37c], // (nil) .. + [0x1f37e, 0x1f393], // (nil) .. + [0x1f3a0, 0x1f3ca], // (nil) .. + [0x1f3cf, 0x1f3d3], // (nil) .. + [0x1f3e0, 0x1f3f0], // (nil) .. + [0x1f3f4, 0x1f3f4], // (nil) .. + [0x1f3f8, 0x1f43e], // (nil) .. + [0x1f440, 0x1f440], // (nil) .. + [0x1f442, 0x1f4fc], // (nil) .. + [0x1f4ff, 0x1f53d], // (nil) .. + [0x1f54b, 0x1f54e], // (nil) .. + [0x1f550, 0x1f567], // (nil) .. + [0x1f57a, 0x1f57a], // (nil) .. + [0x1f595, 0x1f596], // (nil) .. + [0x1f5a4, 0x1f5a4], // (nil) .. + [0x1f5fb, 0x1f64f], // (nil) .. + [0x1f680, 0x1f6c5], // (nil) .. + [0x1f6cc, 0x1f6cc], // (nil) .. + [0x1f6d0, 0x1f6d2], // (nil) .. + [0x1f6eb, 0x1f6ec], // (nil) .. + [0x1f6f4, 0x1f6f9], // (nil) .. + [0x1f910, 0x1f93e], // (nil) .. + [0x1f940, 0x1f970], // (nil) .. + [0x1f973, 0x1f976], // (nil) .. + [0x1f97a, 0x1f97a], // (nil) .. + [0x1f97c, 0x1f9a2], // (nil) .. + [0x1f9b0, 0x1f9b9], // (nil) .. + [0x1f9c0, 0x1f9c2], // (nil) .. + [0x1f9d0, 0x1f9ff], // (nil) .. + [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. + [0x30000, 0x3fffd] // (nil) .. + ]; + + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + + if (bisearch(ucs, WIDE_EASTASIAN)) { + return 2; + } + + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + if (num === 0x1F600) return 2; + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; +})({nul: 0, control: 0}); // configurable options + +describe('unicode - v11', () => { + const versionProvider = v11; + versionProvider.init(); + it('wcwidth should match all values from the old implementation', () => { + // test full BMP range old vs new implmenetation + for (let i = 0; i < 65536; ++i) { + assert.equal(versionProvider.wcwidth(i), wcwidthOld(i), `mismatch for i: ${i}`); + } + }); +}); diff --git a/src/unicode/v11.ts b/src/unicode/v11.ts new file mode 100644 index 0000000000..2ed0bdd430 --- /dev/null +++ b/src/unicode/v11.ts @@ -0,0 +1,544 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeVersionProvider } from '../Types'; +import { fill } from '../common/TypedArrayUtils'; + +// Generated: 2018-09-24T16:45:44.483077 +// Source: DerivedGeneralCategory-11.0.0.txt +// Date: 2018-02-21, 05:34:04 GMT +const ZERO_WIDTH_BMP = [ + [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le + [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli + [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg + [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe + [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot + [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra + [0x064b, 0x065f], // Arabic Fathatan .. + [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip + [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen + [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda + [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon + [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem + [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip + [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh + [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun + [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot + [0x07fd, 0x07fd], // (nil) .. + [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh + [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A + [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + [0x0859, 0x085b], // (nil) .. + [0x08d3, 0x08e1], // (nil) .. + [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara + [0x093a, 0x093a], // (nil) .. + [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta + [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama + [0x0951, 0x0957], // Devanagari Stress Sign U.. + [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu + [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta + [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama + [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + [0x09fe, 0x09fe], // (nil) .. + [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak + [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara + [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta + [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama + [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + [0x0afa, 0x0aff], // (nil) .. + [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu + [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta + [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I + [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama + [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark + [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara + [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama + [0x0c00, 0x0c00], // (nil) .. + [0x0c04, 0x0c04], // (nil) .. + [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai + [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama + [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark + [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + [0x0c81, 0x0c81], // (nil) .. + [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta + [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I + [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E + [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama + [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + [0x0d00, 0x0d01], // (nil) .. + [0x0d3b, 0x0d3c], // (nil) .. + [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama + [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a + [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu + [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan + [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu + [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita + [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig + [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta + [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter + [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter + [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat + [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S + [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin + [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama + [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U + [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua + [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit + [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan + [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation + [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U + [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O + [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv + [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i + [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U + [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign + [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot + [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt + [0x1ab0, 0x1abe], // (nil) .. + [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang + [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan + [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L + [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol + [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar + [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan + [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + [0x1bab, 0x1bad], // (nil) .. + [0x1be6, 0x1be6], // (nil) .. + [0x1be8, 0x1be9], // (nil) .. + [0x1bed, 0x1bed], // (nil) .. + [0x1bef, 0x1bf1], // (nil) .. + [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T + [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta + [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha + [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak + [0x1cf4, 0x1cf4], // (nil) .. + [0x1cf8, 0x1cf9], // (nil) .. + [0x1dc0, 0x1df9], // Combining Dotted Grave A.. + [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea + [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above + [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu + [0x2d7f, 0x2d7f], // (nil) .. + [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette + [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton + [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag + [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous + [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer + [0xa69e, 0xa69f], // (nil) .. + [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk + [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. + [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig + [0xa8ff, 0xa8ff], // (nil) .. + [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R + [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar + [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + [0xa9e5, 0xa9e5], // (nil) .. + [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe + [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue + [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa + [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa7c, 0xaa7c], // (nil) .. + [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang + [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U + [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia + [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + [0xaaec, 0xaaed], // (nil) .. + [0xaaf6, 0xaaf6], // (nil) .. + [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 + [0xfe20, 0xfe2f] // Combining Ligature Left .. +]; + +const ZERO_WIDTH_HIGH = [ + [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + [0x102e0, 0x102e0], // (nil) .. + [0x10376, 0x1037a], // (nil) .. + [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama + [0x10ae5, 0x10ae6], // (nil) .. + [0x10d24, 0x10d27], // (nil) .. + [0x10f46, 0x10f50], // (nil) .. + [0x11001, 0x11001], // (nil) .. + [0x11038, 0x11046], // (nil) .. + [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara + [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta + [0x11100, 0x11102], // (nil) .. + [0x11127, 0x1112b], // (nil) .. + [0x1112d, 0x11134], // (nil) .. + [0x11173, 0x11173], // (nil) .. + [0x11180, 0x11181], // (nil) .. + [0x111b6, 0x111be], // (nil) .. + [0x111c9, 0x111cc], // (nil) .. + [0x1122f, 0x11231], // (nil) .. + [0x11234, 0x11234], // (nil) .. + [0x11236, 0x11237], // (nil) .. + [0x1123e, 0x1123e], // (nil) .. + [0x112df, 0x112df], // (nil) .. + [0x112e3, 0x112ea], // (nil) .. + [0x11300, 0x11301], // (nil) .. + [0x1133b, 0x1133c], // (nil) .. + [0x11340, 0x11340], // (nil) .. + [0x11366, 0x1136c], // (nil) .. + [0x11370, 0x11374], // (nil) .. + [0x11438, 0x1143f], // (nil) .. + [0x11442, 0x11444], // (nil) .. + [0x11446, 0x11446], // (nil) .. + [0x1145e, 0x1145e], // (nil) .. + [0x114b3, 0x114b8], // (nil) .. + [0x114ba, 0x114ba], // (nil) .. + [0x114bf, 0x114c0], // (nil) .. + [0x114c2, 0x114c3], // (nil) .. + [0x115b2, 0x115b5], // (nil) .. + [0x115bc, 0x115bd], // (nil) .. + [0x115bf, 0x115c0], // (nil) .. + [0x115dc, 0x115dd], // (nil) .. + [0x11633, 0x1163a], // (nil) .. + [0x1163d, 0x1163d], // (nil) .. + [0x1163f, 0x11640], // (nil) .. + [0x116ab, 0x116ab], // (nil) .. + [0x116ad, 0x116ad], // (nil) .. + [0x116b0, 0x116b5], // (nil) .. + [0x116b7, 0x116b7], // (nil) .. + [0x1171d, 0x1171f], // (nil) .. + [0x11722, 0x11725], // (nil) .. + [0x11727, 0x1172b], // (nil) .. + [0x1182f, 0x11837], // (nil) .. + [0x11839, 0x1183a], // (nil) .. + [0x11a01, 0x11a0a], // (nil) .. + [0x11a33, 0x11a38], // (nil) .. + [0x11a3b, 0x11a3e], // (nil) .. + [0x11a47, 0x11a47], // (nil) .. + [0x11a51, 0x11a56], // (nil) .. + [0x11a59, 0x11a5b], // (nil) .. + [0x11a8a, 0x11a96], // (nil) .. + [0x11a98, 0x11a99], // (nil) .. + [0x11c30, 0x11c36], // (nil) .. + [0x11c38, 0x11c3d], // (nil) .. + [0x11c3f, 0x11c3f], // (nil) .. + [0x11c92, 0x11ca7], // (nil) .. + [0x11caa, 0x11cb0], // (nil) .. + [0x11cb2, 0x11cb3], // (nil) .. + [0x11cb5, 0x11cb6], // (nil) .. + [0x11d31, 0x11d36], // (nil) .. + [0x11d3a, 0x11d3a], // (nil) .. + [0x11d3c, 0x11d3d], // (nil) .. + [0x11d3f, 0x11d45], // (nil) .. + [0x11d47, 0x11d47], // (nil) .. + [0x11d90, 0x11d91], // (nil) .. + [0x11d95, 0x11d95], // (nil) .. + [0x11d97, 0x11d97], // (nil) .. + [0x11ef3, 0x11ef4], // (nil) .. + [0x16af0, 0x16af4], // (nil) .. + [0x16b30, 0x16b36], // (nil) .. + [0x16f8f, 0x16f92], // (nil) .. + [0x1bc9d, 0x1bc9e], // (nil) .. + [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining + [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining + [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining + [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining + [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical + [0x1da00, 0x1da36], // (nil) .. + [0x1da3b, 0x1da6c], // (nil) .. + [0x1da75, 0x1da75], // (nil) .. + [0x1da84, 0x1da84], // (nil) .. + [0x1da9b, 0x1da9f], // (nil) .. + [0x1daa1, 0x1daaf], // (nil) .. + [0x1e000, 0x1e006], // (nil) .. + [0x1e008, 0x1e018], // (nil) .. + [0x1e01b, 0x1e021], // (nil) .. + [0x1e023, 0x1e024], // (nil) .. + [0x1e026, 0x1e02a], // (nil) .. + [0x1e8d0, 0x1e8d6], // (nil) .. + [0x1e944, 0x1e94a], // (nil) .. + [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 +]; + +/* + Ideally the following values should be used to create the lookup table, + needs to be tested before we activate this. + +// Generated: 2018-09-24T16:45:44.464578 +// Source: EastAsianWidth-11.0.0.txt +// Date: 2018-05-14, 09:41:59 GMT [KW, LI] +const WIDE_EASTASIAN_BMP = [ + [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler + [0x231a, 0x231b], // Watch ..Hourglass + [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra + [0x23e9, 0x23ec], // (nil) .. + [0x23f0, 0x23f0], // (nil) .. + [0x23f3, 0x23f3], // (nil) .. + [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar + [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage + [0x2648, 0x2653], // Aries ..Pisces + [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol + [0x2693, 0x2693], // Anchor ..Anchor + [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign + [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle + [0x26bd, 0x26be], // Soccer Ball ..Baseball + [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud + [0x26ce, 0x26ce], // (nil) .. + [0x26d4, 0x26d4], // No Entry ..No Entry + [0x26ea, 0x26ea], // Church ..Church + [0x26f2, 0x26f3], // Fountain ..Flag In Hole + [0x26f5, 0x26f5], // Sailboat ..Sailboat + [0x26fa, 0x26fa], // Tent ..Tent + [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump + [0x2705, 0x2705], // (nil) .. + [0x270a, 0x270b], // (nil) .. + [0x2728, 0x2728], // (nil) .. + [0x274c, 0x274c], // (nil) .. + [0x274e, 0x274e], // (nil) .. + [0x2753, 0x2755], // (nil) .. + [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S + [0x2795, 0x2797], // (nil) .. + [0x27b0, 0x27b0], // (nil) .. + [0x27bf, 0x27bf], // (nil) .. + [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square + [0x2b50, 0x2b50], // White Medium Star ..White Medium Star + [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle + [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap + [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified + [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute + [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description + [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In + [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke + [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto + [0x3105, 0x312f], // Bopomofo Letter B .. + [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae + [0x3190, 0x31ba], // Ideographic Annotation L.. + [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q + [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha + [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto + [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo + [0x3300, 0x4dbf], // Square Apaato .. + [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr + [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke + [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih + [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. + [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve + [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop + [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign + [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At + [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa + [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign +]; +*/ + +const WIDE_EASTASIAN_HIGH = [ + [0x16fe0, 0x16fe1], // (nil) .. + [0x17000, 0x187f1], // (nil) .. + [0x18800, 0x18af2], // (nil) .. + [0x1b000, 0x1b11e], // (nil) .. + [0x1b170, 0x1b2fb], // (nil) .. + [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon + [0x1f0cf, 0x1f0cf], // (nil) .. + [0x1f18e, 0x1f18e], // (nil) .. + [0x1f191, 0x1f19a], // (nil) .. + [0x1f200, 0x1f202], // Square Hiragana Hoka .. + [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. + [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed + [0x1f250, 0x1f251], // (nil) .. + [0x1f260, 0x1f265], // (nil) .. + [0x1f300, 0x1f320], // (nil) .. + [0x1f32d, 0x1f335], // (nil) .. + [0x1f337, 0x1f37c], // (nil) .. + [0x1f37e, 0x1f393], // (nil) .. + [0x1f3a0, 0x1f3ca], // (nil) .. + [0x1f3cf, 0x1f3d3], // (nil) .. + [0x1f3e0, 0x1f3f0], // (nil) .. + [0x1f3f4, 0x1f3f4], // (nil) .. + [0x1f3f8, 0x1f43e], // (nil) .. + [0x1f440, 0x1f440], // (nil) .. + [0x1f442, 0x1f4fc], // (nil) .. + [0x1f4ff, 0x1f53d], // (nil) .. + [0x1f54b, 0x1f54e], // (nil) .. + [0x1f550, 0x1f567], // (nil) .. + [0x1f57a, 0x1f57a], // (nil) .. + [0x1f595, 0x1f596], // (nil) .. + [0x1f5a4, 0x1f5a4], // (nil) .. + [0x1f5fb, 0x1f64f], // (nil) .. + [0x1f680, 0x1f6c5], // (nil) .. + [0x1f6cc, 0x1f6cc], // (nil) .. + [0x1f6d0, 0x1f6d2], // (nil) .. + [0x1f6eb, 0x1f6ec], // (nil) .. + [0x1f6f4, 0x1f6f9], // (nil) .. + [0x1f910, 0x1f93e], // (nil) .. + [0x1f940, 0x1f970], // (nil) .. + [0x1f973, 0x1f976], // (nil) .. + [0x1f97a, 0x1f97a], // (nil) .. + [0x1f97c, 0x1f9a2], // (nil) .. + [0x1f9b0, 0x1f9b9], // (nil) .. + [0x1f9c0, 0x1f9c2], // (nil) .. + [0x1f9d0, 0x1f9ff], // (nil) .. + [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. + [0x30000, 0x3fffd] // (nil) .. +]; + +let lookupTable: Uint8Array | null = null; + +// binary search +function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; +} + +function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, ZERO_WIDTH_HIGH)) { + return 0; + } + if (bisearch(ucs, WIDE_EASTASIAN_HIGH)) { + return 2; + } + return 1; +} + + +export const v11: IUnicodeVersionProvider = { + version: 11, + wcwidth: (ucs: number): number => { + if (ucs < 32) { + return 0; + } + if (ucs < 127) { + return 1; + } + if (ucs < 65536) { + return lookupTable[ucs]; + } + // do a full search for high codepoints + return wcwidthHigh(ucs); + }, + init: () => { + // init only once + if (lookupTable) { + return; + } + + // create lookup table for BMP plane + const table = new Uint8Array(65536); + fill(table, 1); + table[0] = 0; + // control chars + fill(table, 0, 1, 32); + fill(table, 0, 0x7f, 0xa0); + + // FIXME: after testing and use WIDE_EASTASIAN_BMP below instead + // apply wide char rules first + // wide chars + fill(table, 2, 0x1100, 0x1160); + table[0x2329] = 2; + table[0x232a] = 2; + fill(table, 2, 0x2e80, 0xa4d0); + table[0x303f] = 1; // wrongly in last line + + fill(table, 2, 0xac00, 0xd7a4); + fill(table, 2, 0xf900, 0xfb00); + fill(table, 2, 0xfe10, 0xfe1a); + fill(table, 2, 0xfe30, 0xfe70); + fill(table, 2, 0xff00, 0xff61); + fill(table, 2, 0xffe0, 0xffe7); + + // for (let r = 0; r < WIDE_EASTASIAN_BMP.length; ++r) { + // fill(table, 2, WIDE_EASTASIAN_BMP[r][0], WIDE_EASTASIAN_BMP[r][1] + 1); + // } + + // apply combining last to ensure we overwrite + // wrongly wide set chars: + // the original algo evals combining first and falls + // through to wide check so we simply do here the opposite + // combining 0 + for (let r = 0; r < ZERO_WIDTH_BMP.length; ++r) { + fill(table, 0, ZERO_WIDTH_BMP[r][0], ZERO_WIDTH_BMP[r][1] + 1); + } + + lookupTable = table; + } +}; diff --git a/src/unicode/v6.test.ts b/src/unicode/v6.test.ts new file mode 100644 index 0000000000..d0f57273e8 --- /dev/null +++ b/src/unicode/v6.test.ts @@ -0,0 +1,186 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { v6 } from './v6'; + +// old implementation +const wcwidthOld = (function(opts: {nul: number, control: number}): (ucs: number) => number { + // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c + // combining characters + const COMBINING_BMP = [ + [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], + [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], + [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], + [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], + [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], + [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], + [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], + [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], + [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], + [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], + [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], + [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], + [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], + [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], + [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], + [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], + [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], + [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], + [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], + [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], + [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], + [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], + [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], + [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], + [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], + [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], + [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], + [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], + [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], + [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], + [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], + [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], + [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], + [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], + [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], + [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], + [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], + [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], + [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], + [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], + [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], + [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], + [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] + ]; + const COMBINING_HIGH = [ + [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], + [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], + [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], + [0xE0100, 0xE01EF] + ]; + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, COMBINING_BMP)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, COMBINING_HIGH)) { + return 0; + } + if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { + return 2; + } + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; +})({nul: 0, control: 0}); // configurable options + +describe('unicode - v6', () => { + const versionProvider = v6; + versionProvider.init(); + it('wcwidth should match all values from the old implementation', () => { + // test full BMP range old vs new implmenetation + for (let i = 0; i < 65536; ++i) { + assert.equal(versionProvider.wcwidth(i), wcwidthOld(i), `mismatch for i: ${i}`); + } + }); +}); diff --git a/src/unicode/v6.ts b/src/unicode/v6.ts new file mode 100644 index 0000000000..18012c09b1 --- /dev/null +++ b/src/unicode/v6.ts @@ -0,0 +1,150 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeVersionProvider } from '../Types'; +import { fill } from '../common/TypedArrayUtils'; + +// extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c +const COMBINING_BMP = [ + [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], + [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], + [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], + [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], + [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], + [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], + [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], + [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], + [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], + [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], + [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], + [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], + [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], + [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], + [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], + [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], + [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], + [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], + [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], + [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], + [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], + [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], + [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], + [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], + [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], + [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], + [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], + [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], + [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], + [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], + [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], + [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], + [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], + [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], + [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], + [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], + [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], + [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], + [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], + [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], + [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], + [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], + [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] +]; + +const COMBINING_HIGH = [ + [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], + [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], + [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], + [0xE0100, 0xE01EF] +]; + +let lookupTable: Uint8Array | null = null; + +function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; +} + +function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, COMBINING_HIGH)) { + return 0; + } + if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { + return 2; + } + return 1; +} + +export const v6: IUnicodeVersionProvider = { + version: 6, + wcwidth: (ucs: number): number => { + if (ucs < 32) { + return 0; + } + if (ucs < 127) { + return 1; + } + if (ucs < 65536) { + return lookupTable[ucs]; + } + // do a full search for high codepoints + return wcwidthHigh(ucs); + }, + init: () => { + // init only once + if (lookupTable) { + return; + } + + // create lookup table for BMP plane + const table = new Uint8Array(65536); + fill(table, 1); + table[0] = 0; + // control chars + fill(table, 0, 1, 32); + fill(table, 0, 0x7f, 0xa0); + + // apply wide char rules first + // wide chars + fill(table, 2, 0x1100, 0x1160); + table[0x2329] = 2; + table[0x232a] = 2; + fill(table, 2, 0x2e80, 0xa4d0); + table[0x303f] = 1; // wrongly in last line + + fill(table, 2, 0xac00, 0xd7a4); + fill(table, 2, 0xf900, 0xfb00); + fill(table, 2, 0xfe10, 0xfe1a); + fill(table, 2, 0xfe30, 0xfe70); + fill(table, 2, 0xff00, 0xff61); + fill(table, 2, 0xffe0, 0xffe7); + + // apply combining last to ensure we overwrite + // wrongly wide set chars: + // the original algo evals combining first and falls + // through to wide check so we simply do here the opposite + // combining 0 + for (let r = 0; r < COMBINING_BMP.length; ++r) { + fill(table, 0, COMBINING_BMP[r][0], COMBINING_BMP[r][1] + 1); + } + + lookupTable = table; + } +}; diff --git a/src/utils/TestUtils.test.ts b/src/utils/TestUtils.test.ts index a5ef4b9f31..f25eb3c340 100644 --- a/src/utils/TestUtils.test.ts +++ b/src/utils/TestUtils.test.ts @@ -4,12 +4,13 @@ */ import { IColorSet, IRenderer, IRenderDimensions, IColorManager } from '../renderer/Types'; -import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator } from '../Types'; +import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator, IUnicodeVersionManager } from '../Types'; import { ICircularList, XtermListener } from '../common/Types'; import { Buffer } from '../Buffer'; import * as Browser from '../shared/utils/Browser'; import { ITheme, IDisposable, IMarker } from 'xterm'; import { Terminal } from '../Terminal'; +import { UnicodeVersionManager } from '../UnicodeManager'; export class TestTerminal extends Terminal { writeSync(data: string): void { @@ -156,6 +157,7 @@ export class MockTerminal implements ITerminal { } registerCharacterJoiner(handler: CharacterJoinerHandler): number { return 0; } deregisterCharacterJoiner(joinerId: number): void { } + unicodeManager: IUnicodeVersionManager = new UnicodeVersionManager(); } export class MockCharMeasure implements ICharMeasure { @@ -196,6 +198,7 @@ export class MockInputHandlingTerminal implements IInputHandlingTerminal { buffer: IBuffer = new MockBuffer(); viewport: IViewport; selectionManager: ISelectionManager; + unicodeManager: IUnicodeVersionManager; focus(): void { throw new Error('Method not implemented.'); } diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index c6b6b1e59f..52d20dd7a3 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -199,6 +199,12 @@ declare module 'xterm' { * The color theme of the terminal. */ theme?: ITheme; + + /** + * Set the unicode version. + * Defaults to the highest available version. + */ + unicodeVersion?: string | number; } /**