From 6bdd38089015c5a1e76e04059d99893bff06bbf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 20 Jan 2019 21:54:58 +0100 Subject: [PATCH 01/10] add utf8 decoder --- package.json | 2 + src/core/input/TextDecoder.test.ts | 136 +++++++++++++++- src/core/input/TextDecoder.ts | 245 ++++++++++++++++++++++++++++- 3 files changed, 375 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 5ed3a4b800..b223c55f02 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "@types/jsdom": "11.0.1", "@types/mocha": "^2.2.33", "@types/node": "6.0.108", + "@types/utf8": "^2.1.6", "@types/webpack": "^4.4.11", "browserify": "^13.3.0", "chai": "3.5.0", @@ -39,6 +40,7 @@ "tslint": "^5.9.1", "tslint-consistent-codestyle": "^1.13.0", "typescript": "3.1", + "utf8": "^3.0.0", "vinyl-buffer": "^1.0.0", "vinyl-source-stream": "^1.1.0", "webpack": "^4.17.1", diff --git a/src/core/input/TextDecoder.test.ts b/src/core/input/TextDecoder.test.ts index f69fbded1d..f38a556922 100644 --- a/src/core/input/TextDecoder.test.ts +++ b/src/core/input/TextDecoder.test.ts @@ -4,7 +4,8 @@ */ import { assert } from 'chai'; -import { StringToUtf32, stringFromCodePoint } from './TextDecoder'; +import { StringToUtf32, stringFromCodePoint, Utf8ToUtf32 } from './TextDecoder'; +import { encode } from 'utf8'; // convert UTF32 codepoints to string @@ -19,7 +20,30 @@ function toString(data: Uint32Array, length: number): string { return result; } -describe('StringToUtf32 Decoder', () => { +// convert "bytestring" (charCode 0-255) to bytes +function fromByteString(s: string): Uint8Array { + const result = new Uint8Array(s.length); + for (let i = 0; i < s.length; ++i) { + result[i] = s.charCodeAt(i); + } + return result; +} + + +const TEST_STRINGS = [ + 'Лорем ипсум долор сит амет, ех сеа аццусам диссентиет. Ан еос стет еирмод витуперата. Иус дицерет урбанитас ет. Ан при алтера долорес сплендиде, цу яуо интегре денияуе, игнота волуптариа инструцтиор цу вим.', + 'ლორემ იფსუმ დოლორ სით ამეთ, ფაცერ მუციუს ცონსეთეთურ ყუო იდ, ფერ ვივენდუმ ყუაერენდუმ ეა, ესთ ამეთ მოვეთ სუავითათე ცუ. ვითაე სენსიბუს ან ვიხ. ეხერცი დეთერრუისსეთ უთ ყუი. ვოცენთ დებითის ადიფისცი ეთ ფერ. ნეც ან ფეუგაით ფორენსიბუს ინთერესსეთ. იდ დიცო რიდენს იუს. დისსენთიეთ ცონსეყუუნთურ სედ ნე, ნოვუმ მუნერე ეუმ ათ, ნე ეუმ ნიჰილ ირაცუნდია ურბანითას.', + 'अधिकांश अमितकुमार प्रोत्साहित मुख्य जाने प्रसारन विश्लेषण विश्व दारी अनुवादक अधिकांश नवंबर विषय गटकउसि गोपनीयता विकास जनित परस्पर गटकउसि अन्तरराष्ट्रीयकरन होसके मानव पुर्णता कम्प्युटर यन्त्रालय प्रति साधन', + '覧六子当聞社計文護行情投身斗来。増落世的況上席備界先関権能万。本物挙歯乳全事携供板栃果以。頭月患端撤競見界記引去法条公泊候。決海備駆取品目芸方用朝示上用報。講申務紙約週堂出応理田流団幸稿。起保帯吉対阜庭支肯豪彰属本躍。量抑熊事府募動極都掲仮読岸。自続工就断庫指北速配鳴約事新住米信中験。婚浜袋著金市生交保他取情距。', + '八メル務問へふらく博辞説いわょ読全タヨムケ東校どっ知壁テケ禁去フミ人過を装5階がねぜ法逆はじ端40落ミ予竹マヘナセ任1悪た。省ぜりせ製暇ょへそけ風井イ劣手はぼまず郵富法く作断タオイ取座ゅょが出作ホシ月給26島ツチ皇面ユトクイ暮犯リワナヤ断連こうでつ蔭柔薄とレにの。演めけふぱ損田転10得観びトげぎ王物鉄夜がまけ理惜くち牡提づ車惑参ヘカユモ長臓超漫ぼドかわ。', + '모든 국민은 행위시의 법률에 의하여 범죄를 구성하지 아니하는 행위로 소추되지 아니하며. 전직대통령의 신분과 예우에 관하여는 법률로 정한다, 국회는 헌법 또는 법률에 특별한 규정이 없는 한 재적의원 과반수의 출석과 출석의원 과반수의 찬성으로 의결한다. 군인·군무원·경찰공무원 기타 법률이 정하는 자가 전투·훈련등 직무집행과 관련하여 받은 손해에 대하여는 법률이 정하는 보상외에 국가 또는 공공단체에 공무원의 직무상 불법행위로 인한 배상은 청구할 수 없다.', + 'كان فشكّل الشرقي مع, واحدة للمجهود تزامناً بعض بل. وتم جنوب للصين غينيا لم, ان وبدون وكسبت الأمور ذلك, أسر الخاسر الانجليزية هو. نفس لغزو مواقعها هو. الجو علاقة الصعداء انه أي, كما مع بمباركة للإتحاد الوزراء. ترتيب الأولى أن حدى, الشتوية باستحداث مدن بل, كان قد أوسع عملية. الأوضاع بالمطالبة كل قام, دون إذ شمال الربيع،. هُزم الخاصّة ٣٠ أما, مايو الصينية مع قبل.', + 'או סדר החול מיזמי קרימינולוגיה. קהילה בגרסה לויקיפדים אל היא, של צעד ציור ואלקטרוניקה. מדע מה ברית המזנון ארכיאולוגיה, אל טבלאות מבוקשים כלל. מאמרשיחהצפה העריכהגירסאות שכל אל, כתב עיצוב מושגי של. קבלו קלאסיים ב מתן. נבחרים אווירונאוטיקה אם מלא, לוח למנוע ארכיאולוגיה מה. ארץ לערוך בקרבת מונחונים או, עזרה רקטות לויקיפדים אחר גם.', + 'Лорем ლორემ अधिकांश 覧六子 八メル 모든 בקרבת 💮 😂 äggg 123€ 𝄞.' +]; + + +describe('StringToUtf32 decoder', () => { describe('full codepoint test', () => { it('0..65535', () => { const decoder = new StringToUtf32(); @@ -51,6 +75,15 @@ describe('StringToUtf32 Decoder', () => { } }); }); + it('test strings', () => { + const decoder = new StringToUtf32(); + const target = new Uint32Array(500); + for (let i = 0; i < TEST_STRINGS.length; ++i) { + const length = decoder.decode(TEST_STRINGS[i], target); + assert.equal(toString(target, length), TEST_STRINGS[i]); + decoder.clear(); + } + }); describe('stream handling', () => { it('surrogates mixed advance by 1', () => { const decoder = new StringToUtf32(); @@ -65,3 +98,102 @@ describe('StringToUtf32 Decoder', () => { }); }); }); + +describe('Utf8ToUtf32 decoder', () => { + describe('full codepoint test', () => { + it('0..65535 (1/2/3 byte sequences)', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + for (let i = 0; i < 65536; ++i) { + // skip surrogate pairs + if (i >= 0xD800 && i <= 0xDFFF) { + continue; + } + const utf8Data = fromByteString(encode(String.fromCharCode(i))); + const length = decoder.decode(utf8Data, target); + assert.equal(length, 1); + assert.equal(toString(target, length), String.fromCharCode(i)); + decoder.clear(); + } + }); + it('65536..0x10FFFF (4 byte sequences)', function(): void { + this.timeout(20000); + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + for (let i = 65536; i < 0x10FFFF; ++i) { + const utf8Data = fromByteString(encode(stringFromCodePoint(i))); + const length = decoder.decode(utf8Data, target); + assert.equal(length, 1); + assert.equal(target[0], i); + decoder.clear(); + } + }); + }); + it('test strings', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(500); + for (let i = 0; i < TEST_STRINGS.length; ++i) { + const utf8Data = fromByteString(encode(TEST_STRINGS[i])); + const length = decoder.decode(utf8Data, target); + assert.equal(toString(target, length), TEST_STRINGS[i]); + decoder.clear(); + } + }); + describe('stream handling', () => { + it('2 byte sequences - advance by 1', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString('\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f\xc3\xb6\xc3\xa4\xc3\xbc'); + let decoded = ''; + for (let i = 0; i < utf8Data.length; ++i) { + const written = decoder.decode(utf8Data.slice(i, i + 1), target); + decoded += toString(target, written); + } + assert(decoded, 'ÄÖÜßöäü'); + }); + it('2/3 byte sequences - advance by 1', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xc3\x96\xe2\x82\xac\xc3\x9c\xe2\x82\xac\xc3\x9f\xe2\x82\xac\xc3\xb6\xe2\x82\xac\xc3\xa4\xe2\x82\xac\xc3\xbc'); + let decoded = ''; + for (let i = 0; i < utf8Data.length; ++i) { + const written = decoder.decode(utf8Data.slice(i, i + 1), target); + decoded += toString(target, written); + } + assert(decoded, 'Āր܀߀ö€ä€ü'); + }); + it('2/3/4 byte sequences - advance by 1', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac'); + let decoded = ''; + for (let i = 0; i < utf8Data.length; ++i) { + const written = decoder.decode(utf8Data.slice(i, i + 1), target); + decoded += toString(target, written); + } + assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€'); + }); + it('2/3/4 byte sequences - advance by 2', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac'); + let decoded = ''; + for (let i = 0; i < utf8Data.length; i += 2) { + const written = decoder.decode(utf8Data.slice(i, i + 2), target); + decoded += toString(target, written); + } + assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€'); + }); + it('2/3/4 byte sequences - advance by 3', () => { + const decoder = new Utf8ToUtf32(); + const target = new Uint32Array(5); + const utf8Data = fromByteString('\xc3\x84\xe2\x82\xac\xf0\x9d\x84\x9e\xc3\x96\xf0\x9d\x84\x9e\xe2\x82\xac\xc3\x9c\xf0\x9d\x84\x9e\xe2\x82\xac'); + let decoded = ''; + for (let i = 0; i < utf8Data.length; i += 3) { + const written = decoder.decode(utf8Data.slice(i, i + 3), target); + decoded += toString(target, written); + } + assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€'); + }); + }); +}); diff --git a/src/core/input/TextDecoder.ts b/src/core/input/TextDecoder.ts index 04407a09fe..9080c09dcf 100644 --- a/src/core/input/TextDecoder.ts +++ b/src/core/input/TextDecoder.ts @@ -3,6 +3,19 @@ * @license MIT */ + +/** + * Polyfill - Convert UTF32 codepoint into JS string. + */ +export function stringFromCodePoint(codePoint: number): string { + if (codePoint > 0xFFFF) { + codePoint -= 0x10000; + return String.fromCharCode((codePoint >> 10) + 0xD800) + String.fromCharCode((codePoint % 0x400) + 0xDC00); + } + return String.fromCharCode(codePoint); +} + + /** * StringToUtf32 - decodes UTF16 sequences into UTF32 codepoints. * To keep the decoder in line with JS strings it handles single surrogates as UCS2. @@ -73,12 +86,232 @@ export class StringToUtf32 { } /** - * Polyfill - Convert UTF32 codepoint into JS string. + * Utf8Decoder - decodes UTF8 byte sequences into UTF32 codepoints. */ -export function stringFromCodePoint(codePoint: number): string { - if (codePoint > 0xFFFF) { - codePoint -= 0x10000; - return String.fromCharCode((codePoint >> 10) + 0xD800) + String.fromCharCode((codePoint % 0x400) + 0xDC00); +export class Utf8ToUtf32 { + public interim: Uint8Array = new Uint8Array(3); + + /** + * Clears interim bytes and resets decoder to clean state. + */ + public clear(): void { + this.interim.fill(0); + } + + /** + * Decodes UTF8 byte sequences in `input` to UTF32 codepoints in `target`. + * The methods assumes stream input and will store partly transmitted bytes + * and decode them with the next data chunk. + * Note: The method does no bound checks for target, therefore make sure + * the provided data chunk does not exceed the size of `target`. + * Returns the number of written codepoints in `target`. + */ + decode(input: Uint8Array, target: Uint32Array): number { + const length = input.length; + + if (!length) { + return 0; + } + + let size = 0; + let byte1; + let byte2; + let byte3; + let byte4; + let codepoint = 0; + let startPos = 0; + + // handle leftover bytes + if (this.interim[0]) { + let discardInterim = false; + let cp = this.interim[0]; + cp &= ((((cp & 0xE0) === 0xC0)) ? 0x1F : (((cp & 0xF0) === 0xE0)) ? 0x0F : 0x07); + let pos = 0; + let tmp; + while ((tmp = this.interim[++pos] & 0x3F) && pos < 4) { + cp <<= 6; + cp |= tmp; + } + // missing bytes - read ahead from input + const type = (((this.interim[0] & 0xE0) === 0xC0)) ? 2 : (((this.interim[0] & 0xF0) === 0xE0)) ? 3 : 4; + const missing = type - pos; + while (startPos < missing) { + if (startPos >= length) { + return 0; + } + tmp = input[startPos++]; + if ((tmp & 0xC0) !== 0x80) { + // wrong continuation, discard interim bytes completely + startPos--; + discardInterim = true; + break; + } else { + // need to save so we can continue short inputs in next call + this.interim[pos++] = tmp; + cp <<= 6; + cp |= tmp & 0x3F; + } + } + if (!discardInterim) { + // final test is type dependent + if (type === 2) { + if (cp < 0x80) { + // wrong starter byte + startPos--; + } else { + target[size++] = cp; + } + } else if (type === 3) { + if (cp < 0x0800 || (cp >= 0xD800 && cp <= 0xDFFF)) { + // illegal codepoint + } else { + target[size++] = cp; + } + } else { + if (codepoint < 0x010000 || codepoint > 0x10FFFF) { + // illegal codepoint + } else { + target[size++] = cp; + } + } + } + this.interim.fill(0); + } + + // loop through input + const fourStop = length - 4; + let i = startPos; + while (i < length) { + + /** + * ASCII shortcut with loop unrolled to 4 consecutive ASCII chars. + * This is a compromise between speed gain for ASCII + * and penalty for non ASCII: + * For best ASCII performance the char should be stored directly into target, + * but even a single attempt to write to target and compare afterwards + * penalizes non ASCII really bad (-50%), thus we load the char into byteX first, + * which reduces ASCII performance by ~15%. + * This trial for ASCII reduces non ASCII performance by ~10% which seems acceptible + * compared to the gains. + * Note that this optimization only takes place for 4 consecutive ASCII chars, + * for any shorter it bails out. Worst case - all 4 bytes being read but + * thrown away due to the last being a non ASCII char (-10% performance). + */ + while (i < fourStop + && !((byte1 = input[i]) & 0x80) + && !((byte2 = input[i + 1]) & 0x80) + && !((byte3 = input[i + 2]) & 0x80) + && !((byte4 = input[i + 3]) & 0x80)) + { + target[size++] = byte1; + target[size++] = byte2; + target[size++] = byte3; + target[size++] = byte4; + i += 4; + } + + // reread byte1 + byte1 = input[i++]; + + // 1 byte + if (byte1 < 0x80) { + target[size++] = byte1; + + // 2 bytes + } else if ((byte1 & 0xE0) === 0xC0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x1F) << 6 | (byte2 & 0x3F); + if (codepoint < 0x80) { + // wrong starter byte + i--; + continue; + } + target[size++] = codepoint; + + // 3 bytes + } else if ((byte1 & 0xF0) === 0xE0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + return size; + } + byte3 = input[i++]; + if ((byte3 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x0F) << 12 | (byte2 & 0x3F) << 6 | (byte3 & 0x3F); + if (codepoint < 0x0800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) { + // illegal codepoint, no i-- here + continue; + } + target[size++] = codepoint; + + // 4 bytes + } else if ((byte1 & 0xF8) === 0xF0) { + if (i >= length) { + this.interim[0] = byte1; + return size; + } + byte2 = input[i++]; + if ((byte2 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + return size; + } + byte3 = input[i++]; + if ((byte3 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + if (i >= length) { + this.interim[0] = byte1; + this.interim[1] = byte2; + this.interim[2] = byte3; + return size; + } + byte4 = input[i++]; + if ((byte4 & 0xC0) !== 0x80) { + // wrong continuation + i--; + continue; + } + codepoint = (byte1 & 0x07) << 18 | (byte2 & 0x3F) << 12 | (byte3 & 0x3F) << 6 | (byte4 & 0x3F); + if (codepoint < 0x010000 || codepoint > 0x10FFFF) { + // illegal codepoint, no i-- here + continue; + } + target[size++] = codepoint; + } else { + // illegal byte, just skip + } + } + return size; } - return String.fromCharCode(codePoint); } From d7ea0edfcf27fdd3e37963b041fc647d035b0b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 20 Jan 2019 22:19:50 +0100 Subject: [PATCH 02/10] add utf8 input to terminal --- src/InputHandler.ts | 33 ++++++++++++++++++++++++++++++--- src/Terminal.ts | 17 +++++++++++++++++ src/Types.ts | 1 + src/public/Terminal.ts | 3 +++ src/ui/TestUtils.test.ts | 3 +++ typings/xterm.d.ts | 6 ++++++ 6 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/InputHandler.ts b/src/InputHandler.ts index c41b4165c9..7049102788 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -15,7 +15,7 @@ import { ICharset } from './core/Types'; import { IDisposable } from 'xterm'; import { Disposable } from './common/Lifecycle'; import { concat, utf32ToString } from './common/TypedArrayUtils'; -import { StringToUtf32, stringFromCodePoint } from './core/input/TextDecoder'; +import { StringToUtf32, stringFromCodePoint, Utf8ToUtf32 } from './core/input/TextDecoder'; import { CellData } from './BufferLine'; /** @@ -104,8 +104,9 @@ class DECRQSS implements IDcsHandler { * each function's header comment. */ export class InputHandler extends Disposable implements IInputHandler { - private _parseBuffer: Uint32Array = new Uint32Array(4096); - private _stringDecoder: StringToUtf32 = new StringToUtf32(); + private _parseBuffer = new Uint32Array(4096); + private _stringDecoder = new StringToUtf32(); + private _utf8Decoder = new Utf8ToUtf32(); private _cell: CellData = new CellData(); constructor( @@ -311,6 +312,32 @@ export class InputHandler extends Disposable implements IInputHandler { } } + public parseUtf8(data: Uint8Array): void { + // Ensure the terminal is not disposed + if (!this._terminal) { + return; + } + + let buffer = this._terminal.buffer; + const cursorStartX = buffer.x; + const cursorStartY = buffer.y; + + // TODO: Consolidate debug/logging #1560 + if ((this._terminal).debug) { + this._terminal.log('data: ' + data); + } + + if (this._parseBuffer.length < data.length) { + this._parseBuffer = new Uint32Array(data.length); + } + this._parser.parse(this._parseBuffer, this._utf8Decoder.decode(data, this._parseBuffer)); + + buffer = this._terminal.buffer; + if (buffer.x !== cursorStartX || buffer.y !== cursorStartY) { + this._terminal.emit('cursormove'); + } + } + public print(data: Uint32Array, start: number, end: number): void { let code: number; let chWidth: number; diff --git a/src/Terminal.ts b/src/Terminal.ts index 33d8e60f28..e92ef08bfd 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -1301,6 +1301,23 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II } } + /** + * Writes utf8 data to the terminal. + * TODO: This currently does no flow control. + */ + public writeUtf8(data: Uint8Array): void { + if (this._isDisposed) { + return; + } + this._refreshStart = this.buffer.y; + this._refreshEnd = this.buffer.y; + + this._inputHandler.parseUtf8(data); + + this.updateRange(this.buffer.y); + this.refresh(this._refreshStart, this._refreshEnd); + } + /** * Writes text to the terminal. * @param data The text to write to the terminal. diff --git a/src/Types.ts b/src/Types.ts index d176799f4a..186e21492f 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -111,6 +111,7 @@ export interface ICompositionHelper { */ export interface IInputHandler { parse(data: string): void; + parseUtf8(data: Uint8Array): void; print(data: Uint32Array, start: number, end: number): void; /** C0 BEL */ bell(): void; diff --git a/src/public/Terminal.ts b/src/public/Terminal.ts index 87fcfaef99..7e219e0870 100644 --- a/src/public/Terminal.ts +++ b/src/public/Terminal.ts @@ -122,6 +122,9 @@ export class Terminal implements ITerminalApi { public write(data: string): void { this._core.write(data); } + public writeUtf8(data: Uint8Array): void { + this._core.writeUtf8(data); + } public getOption(key: 'bellSound' | 'bellStyle' | 'cursorStyle' | 'fontFamily' | 'fontWeight' | 'fontWeightBold' | 'rendererType' | 'termName'): string; public getOption(key: 'allowTransparency' | 'cancelEvents' | 'convertEol' | 'cursorBlink' | 'debug' | 'disableStdin' | 'enableBold' | 'macOptionIsMeta' | 'rightClickSelectsWord' | 'popOnBell' | 'screenKeys' | 'useFlowControl' | 'visualBell'): boolean; public getOption(key: 'colors'): string[]; diff --git a/src/ui/TestUtils.test.ts b/src/ui/TestUtils.test.ts index 9d525fbf92..13abee7658 100644 --- a/src/ui/TestUtils.test.ts +++ b/src/ui/TestUtils.test.ts @@ -99,6 +99,9 @@ export class MockTerminal implements ITerminal { write(data: string): void { throw new Error('Method not implemented.'); } + writeUtf8(data: Uint8Array): void { + throw new Error('Method not implemented.'); + } bracketedPasteMode: boolean; mouseHelper: IMouseHelper; renderer: IRenderer; diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index 11fab9097e..ffaba90eef 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -636,6 +636,12 @@ declare module 'xterm' { */ write(data: string): void; + /** + * Writes UTF8 data to the terminal. + * @param data The data to write to the terminal. + */ + writeUtf8(data: Uint8Array): void; + /** * Retrieves an option's value from the terminal. * @param key The option key. From e6e5ecc0f2c4742e781b0f6c7b95caaaae6a024f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 20 Jan 2019 23:06:51 +0100 Subject: [PATCH 03/10] change demo to utf8 input --- demo/client.ts | 1 + demo/server.js | 13 +++++++------ src/addons/attach/attach.ts | 5 +++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/demo/client.ts b/demo/client.ts index a3a912f630..a38b8a5c6b 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -134,6 +134,7 @@ function createTerminal(): void { pid = processId; socketURL += processId; socket = new WebSocket(socketURL); + socket.binaryType = 'arraybuffer'; socket.onopen = runRealTerminal; socket.onclose = runFakeTerminal; socket.onerror = runFakeTerminal; diff --git a/demo/server.js b/demo/server.js index 5ff9ca61a7..86bc906988 100644 --- a/demo/server.js +++ b/demo/server.js @@ -32,7 +32,8 @@ function startServer() { cols: cols || 80, rows: rows || 24, cwd: process.env.PWD, - env: process.env + env: process.env, + encoding: null }); console.log('Created terminal with PID: ' + term.pid); @@ -62,20 +63,20 @@ function startServer() { ws.send(logs[term.pid]); function buffer(socket, timeout) { - let s = ''; + let buffer = []; let sender = null; return (data) => { - s += data; + buffer.push(data); if (!sender) { sender = setTimeout(() => { - socket.send(s); - s = ''; + socket.send(Buffer.concat(buffer)); + buffer = []; sender = null; }, timeout); } }; } - const send = buffer(ws, 5); + const send = buffer(ws, 5); term.on('data', function(data) { try { diff --git a/src/addons/attach/attach.ts b/src/addons/attach/attach.ts index f121e2e2d4..7333f92bf3 100644 --- a/src/addons/attach/attach.ts +++ b/src/addons/attach/attach.ts @@ -42,6 +42,11 @@ export function attach(term: Terminal, socket: WebSocket, bidirectional: boolean addonTerminal.__getMessage = function(ev: MessageEvent): void { let str: string; + if (ev.data instanceof ArrayBuffer) { + addonTerminal.writeUtf8(new Uint8Array(ev.data)); + return; + } + if (typeof ev.data === 'object') { if (!myTextDecoder) { myTextDecoder = new TextDecoder(); From 7a567d27177615f7abfad5ae47f6a88f1c1c9e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 11 May 2019 23:10:18 +0200 Subject: [PATCH 04/10] apply time-based limit --- src/Terminal.ts | 81 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/src/Terminal.ts b/src/Terminal.ts index 2cb1dcf08d..391cd991f3 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -183,6 +183,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II // user input states public writeBuffer: string[]; + public writeBufferUtf8: Uint8Array[]; private _writeInProgress: boolean; /** @@ -340,6 +341,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II // user input states this.writeBuffer = []; + this.writeBufferUtf8 = []; this._writeInProgress = false; this._xoffSentToCatchUp = false; @@ -1366,20 +1368,85 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II } /** - * Writes utf8 data to the terminal. - * TODO: This currently does no flow control. + * Writes raw utf8 bytes to the terminal. + * @param data The text to write to the terminal. */ public writeUtf8(data: Uint8Array): void { + // Ensure the terminal isn't disposed if (this._isDisposed) { return; } - this._refreshStart = this.buffer.y; - this._refreshEnd = this.buffer.y; - this._inputHandler.parseUtf8(data); + // Ignore falsy data values (including the empty string) + if (!data) { + return; + } + + this.writeBufferUtf8.push(data); + + // Send XOFF to pause the pty process if the write buffer becomes too large so + // xterm.js can catch up before more data is sent. This is necessary in order + // to keep signals such as ^C responsive. + if (this.options.useFlowControl && !this._xoffSentToCatchUp && this.writeBufferUtf8.length >= WRITE_BUFFER_PAUSE_THRESHOLD) { + // XOFF - stop pty pipe + // XON will be triggered by emulator before processing data chunk + this.handler(C0.DC3); + this._xoffSentToCatchUp = true; + } + + if (!this._writeInProgress && this.writeBufferUtf8.length > 0) { + // Kick off a write which will write all data in sequence recursively + this._writeInProgress = true; + // Kick off an async innerWrite so more writes can come in while processing data + setTimeout(() => { + this._innerWriteUtf8(); + }); + } + } + + protected _innerWriteUtf8(bufferOffset: number = 0): void { + // Ensure the terminal isn't disposed + if (this._isDisposed) { + this.writeBufferUtf8 = []; + } + + const startTime = Date.now(); + while (this.writeBufferUtf8.length > bufferOffset) { + const data = this.writeBufferUtf8[bufferOffset]; + bufferOffset++; + + // If XOFF was sent in order to catch up with the pty process, resume it if + // we reached the end of the writeBuffer to allow more data to come in. + if (this._xoffSentToCatchUp && this.writeBufferUtf8.length === bufferOffset) { + this.handler(C0.DC1); + this._xoffSentToCatchUp = false; + } - this.updateRange(this.buffer.y); - this.refresh(this._refreshStart, this._refreshEnd); + this._refreshStart = this.buffer.y; + this._refreshEnd = this.buffer.y; + + // HACK: Set the parser state based on it's state at the time of return. + // This works around the bug #662 which saw the parser state reset in the + // middle of parsing escape sequence in two chunks. For some reason the + // state of the parser resets to 0 after exiting parser.parse. This change + // just sets the state back based on the correct return statement. + + this._inputHandler.parseUtf8(data); + + this.updateRange(this.buffer.y); + this.refresh(this._refreshStart, this._refreshEnd); + + if (Date.now() - startTime >= WRITE_TIMEOUT_MS) { + break; + } + } + if (this.writeBufferUtf8.length > bufferOffset) { + // Allow renderer to catch up before processing the next batch + setTimeout(() => this._innerWriteUtf8(bufferOffset), 0); + } else { + this._writeInProgress = false; + this.writeBufferUtf8 = []; + } } /** From ce079f399c89ba059797b582a8045686a0a0573f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 12 May 2019 00:07:07 +0200 Subject: [PATCH 05/10] fix docstring --- src/Terminal.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Terminal.ts b/src/Terminal.ts index 391cd991f3..383b73f8ff 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -1369,7 +1369,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II /** * Writes raw utf8 bytes to the terminal. - * @param data The text to write to the terminal. + * @param data UintArray with UTF8 bytes to write to the terminal. */ public writeUtf8(data: Uint8Array): void { // Ensure the terminal isn't disposed From 2340fcd071975ced2ed7451b89773ff483a07b44 Mon Sep 17 00:00:00 2001 From: Daniel Imms Date: Sat, 11 May 2019 21:21:36 -0700 Subject: [PATCH 06/10] Fix compile and update yarn.lock with utf8 --- src/Types.ts | 1 + yarn.lock | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/src/Types.ts b/src/Types.ts index b6b8ac8642..84927a658e 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -263,6 +263,7 @@ export interface IPublicTerminal extends IDisposable, IEventEmitter { scrollToLine(line: number): void; clear(): void; write(data: string): void; + writeUtf8(data: Uint8Array): void; getOption(key: string): any; setOption(key: string, value: any): void; refresh(start: number, end: number): void; diff --git a/yarn.lock b/yarn.lock index e14ad3a1a2..87ca4a1826 100644 --- a/yarn.lock +++ b/yarn.lock @@ -93,6 +93,11 @@ dependencies: source-map "^0.6.1" +"@types/utf8@^2.1.6": + version "2.1.6" + resolved "https://registry.yarnpkg.com/@types/utf8/-/utf8-2.1.6.tgz#430cabb71a42d0a3613cce5621324fe4f5a25753" + integrity sha512-pRs2gYF5yoKYrgSaira0DJqVg2tFuF+Qjp838xS7K+mJyY2jJzjsrl6y17GbIa4uMRogMbxs+ghNCvKg6XyNrA== + "@types/webpack@^4.4.11": version "4.4.11" resolved "https://registry.yarnpkg.com/@types/webpack/-/webpack-4.4.11.tgz#0ca832870d55c4e92498c01d22d00d02b0f62ae9" @@ -6823,6 +6828,11 @@ user-home@^1.1.1: resolved "https://registry.yarnpkg.com/user-home/-/user-home-1.1.1.tgz#2b5be23a32b63a7c9deb8d0f28d485724a3df190" integrity sha1-K1viOjK2Onyd640PKNSFcko98ZA= +utf8@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/utf8/-/utf8-3.0.0.tgz#f052eed1364d696e769ef058b183df88c87f69d1" + integrity sha512-E8VjFIQ/TyQgp+TZfS6l8yp/xWppSAHzidGiRrqe4bK4XP9pTRyKFgGJpO3SN7zdX4DeomTrwaseCHovfpFcqQ== + util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" From 00423407c189a8e7e9e6e2a4736d6b579442ddff Mon Sep 17 00:00:00 2001 From: Daniel Imms Date: Sat, 11 May 2019 22:08:25 -0700 Subject: [PATCH 07/10] Clean up --- src/core/input/TextDecoder.test.ts | 3 --- src/core/input/TextDecoder.ts | 4 ---- typings/xterm.d.ts | 14 +++++++------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/core/input/TextDecoder.test.ts b/src/core/input/TextDecoder.test.ts index 68eb1d9b6d..dc358bc529 100644 --- a/src/core/input/TextDecoder.test.ts +++ b/src/core/input/TextDecoder.test.ts @@ -28,7 +28,6 @@ function fromByteString(s: string): Uint8Array { return result; } - const TEST_STRINGS = [ 'Лорем ипсум долор сит амет, ех сеа аццусам диссентиет. Ан еос стет еирмод витуперата. Иус дицерет урбанитас ет. Ан при алтера долорес сплендиде, цу яуо интегре денияуе, игнота волуптариа инструцтиор цу вим.', 'ლორემ იფსუმ დოლორ სით ამეთ, ფაცერ მუციუს ცონსეთეთურ ყუო იდ, ფერ ვივენდუმ ყუაერენდუმ ეა, ესთ ამეთ მოვეთ სუავითათე ცუ. ვითაე სენსიბუს ან ვიხ. ეხერცი დეთერრუისსეთ უთ ყუი. ვოცენთ დებითის ადიფისცი ეთ ფერ. ნეც ან ფეუგაით ფორენსიბუს ინთერესსეთ. იდ დიცო რიდენს იუს. დისსენთიეთ ცონსეყუუნთურ სედ ნე, ნოვუმ მუნერე ეუმ ათ, ნე ეუმ ნიჰილ ირაცუნდია ურბანითას.', @@ -41,7 +40,6 @@ const TEST_STRINGS = [ 'Лорем ლორემ अधिकांश 覧六子 八メル 모든 בקרבת 💮 😂 äggg 123€ 𝄞.' ]; - describe('text encodings', () => { it('stringFromCodePoint/utf32ToString', () => { const s = 'abcdefg'; @@ -110,7 +108,6 @@ describe('text encodings', () => { assert(decoded, 'Ä€𝄞Ö𝄞€Ü𝄞€'); }); }); - }); describe('Utf8ToUtf32 decoder', () => { diff --git a/src/core/input/TextDecoder.ts b/src/core/input/TextDecoder.ts index b9ac19ce01..750298229b 100644 --- a/src/core/input/TextDecoder.ts +++ b/src/core/input/TextDecoder.ts @@ -3,7 +3,6 @@ * @license MIT */ - /** * Polyfill - Convert UTF32 codepoint into JS string. * Note: The built-in String.fromCodePoint happens to be much slower @@ -19,7 +18,6 @@ export function stringFromCodePoint(codePoint: number): string { return String.fromCharCode(codePoint); } - /** * Convert UTF32 char codes into JS string. * Basically the same as `stringFromCodePoint` but for multiple codepoints @@ -44,7 +42,6 @@ export function utf32ToString(data: Uint32Array, start: number = 0, end: number return result; } - /** * StringToUtf32 - decodes UTF16 sequences into UTF32 codepoints. * To keep the decoder in line with JS strings it handles single surrogates as UCS2. @@ -211,7 +208,6 @@ export class Utf8ToUtf32 { const fourStop = length - 4; let i = startPos; while (i < length) { - /** * ASCII shortcut with loop unrolled to 4 consecutive ASCII chars. * This is a compromise between speed gain for ASCII diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index 2edad79a1b..b3bb582e9d 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -553,12 +553,6 @@ declare module 'xterm' { */ resize(columns: number, rows: number): void; - /** - * Writes text to the terminal, followed by a break line character (\n). - * @param data The text to write to the terminal. - */ - writeln(data: string): void; - /** * Opens the terminal within an element. * @param parent The element to create the terminal within. This element @@ -746,7 +740,13 @@ declare module 'xterm' { write(data: string): void; /** - * Writes UTF8 data to the terminal. + * Writes text to the terminal, followed by a break line character (\n). + * @param data The text to write to the terminal. + */ + writeln(data: string): void; + + /** + * Writes text to the terminal encoded as UTF-8 to the terminal. * @param data The data to write to the terminal. */ writeUtf8(data: Uint8Array): void; From e5dfc5603040feb469d8ca801551b37b01b1cf7a Mon Sep 17 00:00:00 2001 From: Daniel Imms Date: Sat, 11 May 2019 22:24:33 -0700 Subject: [PATCH 08/10] Add api test for writeUtf8 --- src/public/Terminal.api.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/public/Terminal.api.ts b/src/public/Terminal.api.ts index 031e03d54a..57645cf1c2 100644 --- a/src/public/Terminal.api.ts +++ b/src/public/Terminal.api.ts @@ -62,6 +62,20 @@ describe('API Integration Tests', () => { assert.equal(await page.evaluate(`window.term.buffer.getLine(1).translateToString(true)`), 'bar'); }); + it.only('writeUtf8', async function(): Promise { + this.timeout(10000); + await openTerminal(); + await page.evaluate(` + // foo + window.term.writeUtf8(new Uint8Array([102, 111, 111])); + // bar + window.term.writeUtf8(new Uint8Array([98, 97, 114])); + // 文 + window.term.writeUtf8(new Uint8Array([230, 150, 135])); + `); + assert.equal(await page.evaluate(`window.term.buffer.getLine(0).translateToString(true)`), 'foobar文'); + }); + it('clear', async function(): Promise { this.timeout(10000); await openTerminal({ rows: 5 }); From c13d6e5eeb6287f78962dce20a24dbb966eaec02 Mon Sep 17 00:00:00 2001 From: Daniel Imms Date: Sat, 11 May 2019 22:29:01 -0700 Subject: [PATCH 09/10] Make write and writeln consistent with writeUtf8 test --- src/public/Terminal.api.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/public/Terminal.api.ts b/src/public/Terminal.api.ts index 57645cf1c2..5d1245324b 100644 --- a/src/public/Terminal.api.ts +++ b/src/public/Terminal.api.ts @@ -47,8 +47,9 @@ describe('API Integration Tests', () => { await page.evaluate(` window.term.write('foo'); window.term.write('bar'); + window.term.write('文'); `); - assert.equal(await page.evaluate(`window.term.buffer.getLine(0).translateToString(true)`), 'foobar'); + assert.equal(await page.evaluate(`window.term.buffer.getLine(0).translateToString(true)`), 'foobar文'); }); it('writeln', async function(): Promise { @@ -57,12 +58,14 @@ describe('API Integration Tests', () => { await page.evaluate(` window.term.writeln('foo'); window.term.writeln('bar'); + window.term.writeln('文'); `); assert.equal(await page.evaluate(`window.term.buffer.getLine(0).translateToString(true)`), 'foo'); assert.equal(await page.evaluate(`window.term.buffer.getLine(1).translateToString(true)`), 'bar'); + assert.equal(await page.evaluate(`window.term.buffer.getLine(2).translateToString(true)`), '文'); }); - it.only('writeUtf8', async function(): Promise { + it('writeUtf8', async function(): Promise { this.timeout(10000); await openTerminal(); await page.evaluate(` From 523d562f73d84639e07d04665006f18498755f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 12 May 2019 18:49:23 +0200 Subject: [PATCH 10/10] fix missing types --- src/Terminal.ts | 2 +- src/core/input/TextDecoder.ts | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Terminal.ts b/src/Terminal.ts index 383b73f8ff..74688cf57d 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -1377,7 +1377,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II return; } - // Ignore falsy data values (including the empty string) + // Ignore falsy data values if (!data) { return; } diff --git a/src/core/input/TextDecoder.ts b/src/core/input/TextDecoder.ts index 750298229b..7e141e02ed 100644 --- a/src/core/input/TextDecoder.ts +++ b/src/core/input/TextDecoder.ts @@ -28,7 +28,7 @@ export function utf32ToString(data: Uint32Array, start: number = 0, end: number for (let i = start; i < end; ++i) { let codepoint = data[i]; if (codepoint > 0xFFFF) { - // JS string are encoded as UTF16, thus a non BMP codepoint gets converted into a surrogate pair + // JS strings are encoded as UTF16, thus a non BMP codepoint gets converted into a surrogate pair // conversion rules: // - subtract 0x10000 from code point, leaving a 20 bit number // - add high 10 bits to 0xD800 --> first surrogate @@ -140,10 +140,10 @@ export class Utf8ToUtf32 { } let size = 0; - let byte1; - let byte2; - let byte3; - let byte4; + let byte1: number; + let byte2: number; + let byte3: number; + let byte4: number; let codepoint = 0; let startPos = 0; @@ -153,7 +153,7 @@ export class Utf8ToUtf32 { let cp = this.interim[0]; cp &= ((((cp & 0xE0) === 0xC0)) ? 0x1F : (((cp & 0xF0) === 0xE0)) ? 0x0F : 0x07); let pos = 0; - let tmp; + let tmp: number; while ((tmp = this.interim[++pos] & 0x3F) && pos < 4) { cp <<= 6; cp |= tmp;