Skip to content

Commit

Permalink
Fix CR+LF being seen as a break between paragraphs
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Nov 10, 2022
1 parent 72d9f0b commit ebf6961
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 9 deletions.
1 change: 0 additions & 1 deletion lib/expressions.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions lib/plugin/break-implicit-sentences.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'

// Two or more new line characters.
import {newLineMulti} from '../expressions.js'

// Break a sentence if a white space with more than one new-line is found.
export const breakImplicitSentences = modifyChildren(function (
child,
Expand All @@ -22,7 +19,10 @@ export const breakImplicitSentences = modifyChildren(function (
while (++position < children.length - 1) {
const node = children[position]

if (node.type !== 'WhiteSpaceNode' || !newLineMulti.test(toString(node))) {
if (
node.type !== 'WhiteSpaceNode' ||
toString(node).split(/\r\n|\r|\n/).length < 3
) {
continue
}

Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
},
"scripts": {
"prepack": "npm run generate && npm run format",
"fixture": "node script/generate-fixture.js",
"generate": "node script/build-expressions.js",
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
"test-api": "node --conditions development test/index.js",
Expand Down
4 changes: 0 additions & 4 deletions script/build-expressions.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ const reAffixSymbol = new RegExp(
// Match one or more new line characters.
const reNewLine = /^[ \t]*((\r?\n|\r)[\t ]*)+$/

// Match two or more new line characters.
const reNewLineMulti = /^[ \t]*((\r?\n|\r)[\t ]*){2,}$/

// Match sentence-ending markers.
const reTerminalMarker = new RegExp('^((?:' + terminalMarker + ')+)$')

Expand Down Expand Up @@ -149,7 +146,6 @@ fs.writeFileSync(
'// This module is generated by `script/build-expressions.js`.',
'export const affixSymbol = ' + reAffixSymbol,
'export const newLine = ' + reNewLine,
'export const newLineMulti = ' + reNewLineMulti,
'export const terminalMarker = ' + reTerminalMarker,
'export const wordSymbolInner = ' + reWordSymbolInner,
'export const numerical = ' + reNumerical,
Expand Down
235 changes: 235 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,241 @@ test('Ellipsis at sentence-end', async function (t) {
)
})

test('Line endings', function () {
assert.deepEqual(
loose(removePosition(latin.parse('alpha\rbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
},
{type: 'WhiteSpaceNode', value: '\r'},
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support a CR line ending as whitespace'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha\nbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
},
{type: 'WhiteSpaceNode', value: '\n'},
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support an LF line ending as whitespace'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha\r\nbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
},
{type: 'WhiteSpaceNode', value: '\r\n'},
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support a CR+LF line ending as whitespace'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha \r\n\tbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
},
{type: 'WhiteSpaceNode', value: ' \r\n\t'},
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support a padded CR+LF line ending as whitespace'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha\r \t\nbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
}
]
}
]
},
{type: 'WhiteSpaceNode', value: '\r \t\n'},
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support CR, whitespace, and then an LF, as a break between paragraphs'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha \r \t\rbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
}
]
}
]
},
{type: 'WhiteSpaceNode', value: ' \r \t\r'},
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support two CRs with whitespace as a break between paragraphs'
)

assert.deepEqual(
loose(removePosition(latin.parse('alpha\r\rbravo'), true)),
{
type: 'RootNode',
children: [
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'alpha'}]
}
]
}
]
},
{type: 'WhiteSpaceNode', value: '\r\r'},
{
type: 'ParagraphNode',
children: [
{
type: 'SentenceNode',
children: [
{
type: 'WordNode',
children: [{type: 'TextNode', value: 'bravo'}]
}
]
}
]
}
]
},
'should support two CRs as a break between paragraphs'
)
})

test('Initial trailing white-space', async function (t) {
await t.test(
'should move trailing white-space up to the highest possible level',
Expand Down

0 comments on commit ebf6961

Please sign in to comment.