Skip to content

Commit

Permalink
Minimize whitespace dirty diffs in existing headings, tables, lists
Browse files Browse the repository at this point in the history
* For existing and edited headings, tables, lists, this patch examines
  the original source for the node and where there is leading/trailing
  whitespace, adds back at most 1 whitespace char.

* So, this doesn't preserve all whitespace or avoid all normalization.
  * this doesn't look for comments or other rendering transparent nodes.
  * this doesn't try to preserve multiple whitespace characters
  * this doesn't try to preserve trailing whitepace for a <td>/<th> that
    is the last element in a wikitext line.

* The blacklist changes are all improvements.

Bug: T195486
Change-Id: Ib571806a765e7306e1dbbf26c293998030aff0c1
  • Loading branch information
subbuss committed May 25, 2018
1 parent a2133c9 commit bf3a2fd
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 66 deletions.
85 changes: 59 additions & 26 deletions lib/html2wt/DOMHandlers.js
Expand Up @@ -60,6 +60,52 @@ function isRecognizedSpanWrapper(type) {
}) !== undefined;
}

function getLeadingSpace(state, node, newEltDefault, fc) {
let space = '';
if (DU.isNewElt(node)) {
if (!fc) {
fc = node.firstChild;
}
if (fc && (!DU.isText(fc) || !fc.nodeValue.match(/^\s/))) {
space = newEltDefault;
}
} else if (state.selserMode) {
const dsr = DU.getDataParsoid(node).dsr;
if (Util.isValidDSR(dsr, true)) {
const offset = dsr[0] + dsr[2];
space = offset < (dsr[1] - dsr[3]) ? state.getOrigSrc(offset, offset + 1) : '';
if (!/[ \t]/.test(space)) {
space = '';
}
}
}
return space;
}

function getTrailingSpace(state, node, newEltDefault) {
let space = '';
if (DU.isNewElt(node)) {
const lc = node.lastChild;
if (lc && (!DU.isText(lc) || !lc.nodeValue.match(/\s$/))) {
space = newEltDefault;
}
} else if (state.selserMode) {
const dsr = DU.getDataParsoid(node).dsr;
if (Util.isValidDSR(dsr, true)) {
const offset = dsr[1] - dsr[3] - 1;
// The > instead of >= is to deal with an edge case
// = = where that single space is captured by the
// getLeadingSpace case above
space = offset > (dsr[0] + dsr[2]) ? state.getOrigSrc(offset, offset + 1) : '';
if (!/[ \t]/.test(space)) {
space = '';
}
}
}

return space;
}

function buildHeadingHandler(headingWT) {
return {
forceSOL: true,
Expand All @@ -69,14 +115,10 @@ function buildHeadingHandler(headingWT) {
// Skip fallback ID if present.
fc = fc.nextSibling;
}

// For new elements, for prettier wikitext serialization,
// emit a space after the last '=' char.
var space = '';
if (DU.isNewElt(node)) {
if (fc && (!DU.isText(fc) || !fc.nodeValue.match(/^\s/))) {
space = ' ';
}
}
let space = getLeadingSpace(state, node, ' ', fc);
state.emitChunk(headingWT + space, node);
state.singleLineContext.enforce();

Expand All @@ -89,13 +131,7 @@ function buildHeadingHandler(headingWT) {

// For new elements, for prettier wikitext serialization,
// emit a space before the first '=' char.
space = '';
if (DU.isNewElt(node)) {
var lc = node.lastChild;
if (lc && (!DU.isText(lc) || !lc.nodeValue.match(/\s$/))) {
space = ' ';
}
}
space = getTrailingSpace(state, node, ' ');
state.emitChunk(space + headingWT, node);
state.singleLineContext.pop();
}),
Expand Down Expand Up @@ -138,13 +174,7 @@ function getListBullets(state, node) {

// For new elements, for prettier wikitext serialization,
// emit a space after the last bullet (if required)
var space = '';
if (DU.isNewElt(node)) {
var fc = node.firstChild;
if (fc && (!DU.isText(fc) || !fc.nodeValue.match(/^\s/))) {
space = ' ';
}
}
var space = getLeadingSpace(state, node, ' ');

var dp, nodeName, parentName;
var res = '';
Expand Down Expand Up @@ -794,12 +824,13 @@ var tagHandlers = JSUtils.mapObject({
.replace(/{{!}}{{!}}/, '{{!}}');
}

const thTag = yield serializeTableTag(startTagSrc, attrSepSrc, state, node, wrapperUnmodified);
const inWideTH = /!!|\|\||^{{!}}{{!}}/.test(thTag);
const trailingSpace = inWideTH ? getTrailingSpace(state, DU.previousNonDeletedSibling(node), '') : '';
const leadingSpace = getLeadingSpace(state, node, '');
// If the HTML for the first th is not enclosed in a tr-tag,
// we start a new line. If not, tr will have taken care of it.
WTSUtils.emitStartTag(
yield serializeTableTag(
startTagSrc, attrSepSrc, state, node, wrapperUnmodified
),
WTSUtils.emitStartTag(trailingSpace + thTag + leadingSpace,
node,
state
);
Expand Down Expand Up @@ -862,8 +893,10 @@ var tagHandlers = JSUtils.mapObject({
startTagSrc, attrSepSrc,
state, node, wrapperUnmodified
);
var inWideTD = (tdTag.length > 1);
WTSUtils.emitStartTag(tdTag, node, state);
var inWideTD = /\|\||^{{!}}{{!}}/.test(tdTag);
const trailingSpace = inWideTD ? getTrailingSpace(state, DU.previousNonDeletedSibling(node), '') : '';
const leadingSpace = getLeadingSpace(state, node, '');
WTSUtils.emitStartTag(trailingSpace + tdTag + leadingSpace, node, state);
var tdHandler = state.serializer.wteHandlers.tdHandler
.bind(state.serializer.wteHandlers, node, inWideTD);

Expand Down
7 changes: 4 additions & 3 deletions lib/utils/Util.js
Expand Up @@ -1350,10 +1350,11 @@ var Util = {
(cp >= 0x10000 && cp <= 0x10ffff);
},

isValidDSR: function(dsr) {
isValidDSR: function(dsr, all) {
const isValidOffset = n => typeof (n) === 'number' && n >= 0;
return dsr &&
typeof (dsr[0]) === 'number' && dsr[0] >= 0 &&
typeof (dsr[1]) === 'number' && dsr[1] >= 0;
isValidOffset(dsr[0]) && isValidOffset(dsr[1]) &&
(!all || (isValidOffset(dsr[2]) && isValidOffset(dsr[3])));
},

/**
Expand Down
14 changes: 8 additions & 6 deletions tests/citeParserTests-blacklist.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bf3a2fd

Please sign in to comment.