From 65534e7b997d0f52bba1a08a81aba06a1ee4a0ff Mon Sep 17 00:00:00 2001 From: Victor Felder Date: Fri, 1 Mar 2019 16:46:28 +0100 Subject: [PATCH] remark-grid-tables: fix for ambiguous width with east asian --- .../__tests__/__snapshots__/index.js.snap | 4 +- .../__tests__/grid-tables.double.md | 10 +++ .../remark-grid-tables/__tests__/index.js | 15 ++++ packages/remark-grid-tables/dist/index.js | 88 ++++++++++++++++--- packages/remark-grid-tables/src/index.js | 11 ++- 5 files changed, 114 insertions(+), 14 deletions(-) diff --git a/packages/remark-grid-tables/__tests__/__snapshots__/index.js.snap b/packages/remark-grid-tables/__tests__/__snapshots__/index.js.snap index 5b5f2ed32..010098ac2 100644 --- a/packages/remark-grid-tables/__tests__/__snapshots__/index.js.snap +++ b/packages/remark-grid-tables/__tests__/__snapshots__/index.js.snap @@ -96,7 +96,9 @@ exports[`grid-table double 1`] = `

Bug #107

例1

例2

例3

例4

例5

例6

例7

Emoji

-

🐶

🍣

👏 🌵

🦄

👨‍👨‍👧‍👦

" +

🐶

🍣

👏 🌵

🦄

👨‍👨‍👧‍👦

+

Emoji and Ambiguous Width

+

🐶

é

👏 🌵

è

👨‍👨‍👧‍👦

" `; exports[`regression: grid table in fenced code block 1`] = ` diff --git a/packages/remark-grid-tables/__tests__/grid-tables.double.md b/packages/remark-grid-tables/__tests__/grid-tables.double.md index d37f6cfb8..e37df95b3 100644 --- a/packages/remark-grid-tables/__tests__/grid-tables.double.md +++ b/packages/remark-grid-tables/__tests__/grid-tables.double.md @@ -212,3 +212,13 @@ Bug #107 | +----+----+ | | 🦄 | 👨‍👨‍👧‍👦 | +---+----+----+ + +## Emoji and Ambiguous Width + ++---+----+----+ +| ⚠ | 🐶 | é | ++===+====+====+ +| ✌ | 👏 🌵 | +| +----+----+ +| | è | 👨‍👨‍👧‍👦 | ++---+----+----+ diff --git a/packages/remark-grid-tables/__tests__/index.js b/packages/remark-grid-tables/__tests__/index.js index 97d5a6ae2..815ce036c 100644 --- a/packages/remark-grid-tables/__tests__/index.js +++ b/packages/remark-grid-tables/__tests__/index.js @@ -140,6 +140,21 @@ test('regression: should not crash when followed by "sth"', () => { expect(contents).toBe(base) }) +test('regression: handles east asian ambiguous width', () => { + const {contents: base} = render(dedent` + +---+ + | ï | + +---+ + `) + + const {contents} = render(dedent` + +---+ + | é | + +---+ + `) + + expect(contents).toBe(base.replace('ï', 'é')) +}) test('stringify', () => { const fileExample = file(join(__dirname, 'grid-tables.md')) diff --git a/packages/remark-grid-tables/dist/index.js b/packages/remark-grid-tables/dist/index.js index 063f16ef6..f4879ce3b 100644 --- a/packages/remark-grid-tables/dist/index.js +++ b/packages/remark-grid-tables/dist/index.js @@ -10,6 +10,8 @@ function _classCallCheck(instance, Constructor) { if (!(instance instanceof Cons var trimEnd = require('lodash.trimend'); var visit = require('unist-util-visit'); +var isFullwidth = require('@nxmix/is-full-width').default; +var splitter = new (require('grapheme-splitter'))(); var mainLineRegex = new RegExp(/((\+)|(\|)).+((\|)|(\+))/); var totalMainLineRegex = new RegExp(/^((\+)|(\|)).+((\|)|(\+))$/); @@ -79,8 +81,9 @@ var TablePart = function () { var cell = this.lastRow()._cells[c]; // Only cells with rowspan equals can be merged - // Test if the char before the cell is a separation character - if (cell._rowspan === newCells[newCells.length - 1]._rowspan && !mergeChars.includes(line[cell._startPosition - 1])) { + // Test if the char does not compose a character + // or the char before the cell is a separation character + if (cell._rowspan === newCells[newCells.length - 1]._rowspan && (!isCodePointPosition(line, cell._startPosition - 1) || !mergeChars.includes(substringLine(line, cell._startPosition - 1)))) { newCells[newCells.length - 1].mergeWith(cell); } else { newCells.push(cell); @@ -95,9 +98,9 @@ var TablePart = function () { var remainingCells = []; for (var c = 0; c < this.lastRow()._cells.length; c++) { var cell = this.lastRow()._cells[c]; - var partLine = line.substring(cell._startPosition - 1, cell._endPosition + 1); + var partLine = substringLine(line, cell._startPosition - 1, cell._endPosition + 1); if (!isSeparationLine(partLine)) { - cell._lines.push(line.substring(cell._startPosition, cell._endPosition)); + cell._lines.push(substringLine(line, cell._startPosition, cell._endPosition)); cell._rowspan += 1; remainingCells.push(cell); } @@ -164,7 +167,7 @@ var TableRow = function () { value: function updateContent(line) { for (var c = 0; c < this._cells.length; c++) { var cell = this._cells[c]; - cell._lines.push(line.substring(cell._startPosition, cell._endPosition)); + cell._lines.push(substringLine(line, cell._startPosition, cell._endPosition)); } } }]); @@ -225,13 +228,16 @@ function isPartLine(line) { return partLineRegex.exec(line); } -function findAll(content, characters) { +function findAll(str, characters) { + var current = 0; var pos = []; + var content = splitter.splitGraphemes(str); for (var i = 0; i < content.length; i++) { var char = content[i]; if (characters.includes(char)) { - pos.push(i); + pos.push(current); } + current += computeLineLength(char); } return pos; } @@ -269,6 +275,66 @@ function computeColumnStartingPositions(lines) { return mergeColumnsStartingPositions(linesInfo); } +function isCodePointPosition(line, pos) { + var content = splitter.splitGraphemes(line); + var offset = 0; + + for (var i = 0; i < content.length; i++) { + // The pos points character position + if (pos === offset) { + return true; + } + // The pos points non-character position + if (pos < offset) { + return false; + } + offset += computeLineLength(content[i]); + } + + // Reaching end means character position + return true; +} + +function substringLine(line, start, end) { + end = end || start + 1; + + var content = splitter.splitGraphemes(line); + var offset = 0; + var str = ''; + + for (var i = 0; i < content.length; i++) { + if (offset >= start) { + str += content[i]; + } + + offset += computeLineLength(content[i]); + + if (offset >= end) { + break; + } + } + + return str; +} + +function isNormalWidth(unicode) { + return unicode <= 0xff && unicode !== 0x00d7 || unicode >= 0xff61 && unicode <= 0xffdf; +} + +function computeLineLength(line) { + var length = 0; + + splitter.splitGraphemes(line).forEach(function (char) { + length += 1; + var codepoint = char.codePointAt(); + if (!isNormalWidth(codepoint)) { + length += isFullwidth(codepoint); + } + }); + + return length; +} + function extractTable(value, eat, tokenizer) { // Extract lines before the grid table var markdownLines = value.split('\n'); @@ -278,7 +344,7 @@ function extractTable(value, eat, tokenizer) { for (; i < markdownLines.length; i++) { var line = markdownLines[i]; if (isSeparationLine(line)) break; - if (line.length === 0) break; + if (computeLineLength(line) === 0) break; before.push(line); } @@ -288,14 +354,14 @@ function extractTable(value, eat, tokenizer) { // Extract table if (!possibleGridTable[i + 1]) return [null, null, null, null]; - var lineLength = possibleGridTable[i + 1].length; + var lineLength = computeLineLength(possibleGridTable[i + 1]); var gridTable = []; var hasHeader = false; for (; i < possibleGridTable.length; i++) { var _line = possibleGridTable[i]; var isMainLine = totalMainLineRegex.exec(_line); // line is in table - if (isMainLine && _line.length === lineLength) { + if (isMainLine && computeLineLength(_line) === lineLength) { var _isHeaderLine = headerLineRegex.exec(_line); if (_isHeaderLine && !hasHeader) hasHeader = true; // A table can't have 2 headers @@ -324,7 +390,7 @@ function extractTable(value, eat, tokenizer) { var after = []; for (; i < possibleGridTable.length; i++) { var _line2 = possibleGridTable[i]; - if (_line2.length === 0) break; + if (computeLineLength(_line2) === 0) break; after.push(markdownLines[i]); } diff --git a/packages/remark-grid-tables/src/index.js b/packages/remark-grid-tables/src/index.js index 1cf806d6f..36a6e7121 100644 --- a/packages/remark-grid-tables/src/index.js +++ b/packages/remark-grid-tables/src/index.js @@ -274,12 +274,19 @@ function substringLine (line, start, end) { return str } +function isNormalWidth (unicode) { + return (unicode <= 0xff && unicode !== 0x00d7) || (unicode >= 0xff61 && unicode <= 0xffdf) +} + function computeLineLength (line) { let length = 0 - splitter.splitGraphemes(line).forEach(str => { + splitter.splitGraphemes(line).forEach(char => { length += 1 - length += isFullwidth(str.codePointAt()) + const codepoint = char.codePointAt() + if (!isNormalWidth(codepoint)) { + length += isFullwidth(codepoint) + } }) return length