Skip to content

Commit

Permalink
html2wt: Strip mw:FallbackId spans before DOM diffs
Browse files Browse the repository at this point in the history
* This ensures that if an editing client strips them and sends us
  that modified DOM, we don't register a spurious diff on these
  headings.

* The blacklist changes are legitimate and are primarily because
  of dom-diff generating a slightly different dom-diff.

Bug: T195414
Change-Id: I49a656c43e32887d876e3c787cc74443f62a43f8
  • Loading branch information
subbuss committed May 25, 2018
1 parent 673b1db commit a2133c9
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 18 deletions.
3 changes: 3 additions & 0 deletions bin/domdiff.test.js
Expand Up @@ -58,6 +58,9 @@ Promise.async(function *() {
var oldDOM = DU.parseHTML(oldhtml).body;
var newDOM = DU.parseHTML(newhtml).body;

DU.stripSectionTagsAndFallbackIds(oldDOM);
DU.stripSectionTagsAndFallbackIds(newDOM);

var dummyEnv = {
conf: { parsoid: { debug: Util.booleanOption(argv.debug) }, wiki: {} },
page: { id: null },
Expand Down
3 changes: 1 addition & 2 deletions bin/parserTests.js
Expand Up @@ -249,8 +249,7 @@ ParserTests.prototype.applyChanges = function(item, body, changelist) {
var newNode;

// Don't separate legacy IDs from their H? node.
if (n.nodeName === 'SPAN' &&
n.getAttribute('typeof') === 'mw:FallbackId') {
if (DU.isFallbackIdSpan(n)) {
n = n.nextSibling || n.parentNode;
}

Expand Down
4 changes: 2 additions & 2 deletions bin/roundtrip-test.js
Expand Up @@ -456,8 +456,8 @@ var checkIfSignificant = function(offsets, data) {
stripElementIds(newBody.ownerDocument.body);

// Strip section tags from the DOMs
DU.stripSectionTags(oldBody.ownerDocument.body);
DU.stripSectionTags(newBody.ownerDocument.body);
DU.stripSectionTagsAndFallbackIds(oldBody.ownerDocument.body);
DU.stripSectionTagsAndFallbackIds(newBody.ownerDocument.body);

var i, offset;
var results = [];
Expand Down
4 changes: 2 additions & 2 deletions lib/html2wt/DOMHandlers.js
Expand Up @@ -64,8 +64,8 @@ function buildHeadingHandler(headingWT) {
return {
forceSOL: true,
handle: Promise.async(function *(node, state, wrapperUnmodified) {
var fc = node.firstChild;
if (fc && DU.hasTypeOf(fc, 'mw:FallbackId')) {
let fc = node.firstChild;
if (fc && DU.isFallbackIdSpan(fc)) {
// Skip fallback ID if present.
fc = fc.nextSibling;
}
Expand Down
6 changes: 3 additions & 3 deletions lib/html2wt/SelectiveSerializer.js
Expand Up @@ -73,11 +73,11 @@ SelectiveSerializer.prototype.serializeDOM = Promise.async(function *(body) {
domDiffStart = Date.now();
}

// Strip <section> tags, if present.
// Strip <section> and mw:FallbackId <span> tags, if present.
// This ensures that we can accept HTML from CX / VE
// and other clients that might have stripped them.
DU.stripSectionTags(body);
DU.stripSectionTags(this.env.page.dom);
DU.stripSectionTagsAndFallbackIds(body);
DU.stripSectionTagsAndFallbackIds(this.env.page.dom);

diff = (new DOMDiff(this.env)).diff(this.env.page.dom, body);

Expand Down
2 changes: 1 addition & 1 deletion lib/html2wt/WikitextSerializer.js
Expand Up @@ -1416,7 +1416,7 @@ WS.prototype.serializeDOM = Promise.async(function *(body, selserMode) {
if (!selserMode) {
// Strip <section> tags
// Selser mode will have done that already before running dom-diff
DU.stripSectionTags(body);
DU.stripSectionTagsAndFallbackIds(body);
}

this.logType = selserMode ? "trace/selser" : "trace/wts";
Expand Down
15 changes: 12 additions & 3 deletions lib/utils/DOMUtils.js
Expand Up @@ -994,6 +994,10 @@ DU = DOMUtils = {
}
},

isFallbackIdSpan: function(node) {
return node.nodeName === 'SPAN' && node.getAttribute('typeof') === 'mw:FallbackId';
},

/**
* These are primarily 'metadata'-like nodes that don't show up in output rendering.
* - In Parsoid output, they are represented by link/meta tags.
Expand All @@ -1011,7 +1015,7 @@ DU = DOMUtils = {
DU.isSolTransparentLink(node) ||
// Catch-all for everything else.
(node.nodeName === 'META' && DU.getDataParsoid(node).stx !== 'html') ||
(node.nodeName === 'SPAN' && node.getAttribute('typeof') === 'mw:FallbackId');
DU.isFallbackIdSpan(node);
},

/**
Expand Down Expand Up @@ -2663,19 +2667,24 @@ DOMUtils.isParsoidSectionTag = function(node) {
node.getAttribute('data-mw-section-id') !== null;
};

DOMUtils.stripSectionTags = function(node) {
DOMUtils.stripSectionTagsAndFallbackIds = function(node) {
var n = node.firstChild;
while (n) {
var next = n.nextSibling;
if (DU.isElt(n)) {
// Recurse into subtree before stripping this
DU.stripSectionTags(n);
DU.stripSectionTagsAndFallbackIds(n);

// Strip <section> tags
if (DU.isParsoidSectionTag(n)) {
DU.migrateChildren(n, n.parentNode, n);
DU.deleteNode(n);
}

// Strip <span typeof='mw:FallbackId' ...></span>
if (DU.isFallbackIdSpan(n)) {
DU.deleteNode(n);
}
}
n = next;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/wt2html/DOMPostProcessor.js
Expand Up @@ -223,7 +223,7 @@ function DOMPostProcessor(env, options) {
// Only update headings and legacy links (first children of heading)
if (
/^H\d$/.test(node.nodeName) ||
node.getAttribute('typeof') === 'mw:FallbackId'
DU.isFallbackIdSpan(node)
) {
var suffix = 2;
while (this.seenIds.has(key + '_' + suffix)) {
Expand Down
3 changes: 1 addition & 2 deletions tests/TestUtils.js
Expand Up @@ -222,8 +222,7 @@ TestUtils.unwrapSpansAndNormalizeIEW = function(body, stripSpanTypeof, parsoidOn
// Skip over the empty mw:FallbackId <span> and strip leading WS
// on the other side of it.
if (/^H[1-6]$/.test(node.nodeName) &&
child && child.nodeName === 'SPAN' &&
child.getAttribute('typeof') === 'mw:FallbackId') {
child && DU.isFallbackIdSpan(child)) {
child = child.nextSibling;
}
for (; child; child = next) {
Expand Down
5 changes: 3 additions & 2 deletions tests/parserTests-blacklist.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a2133c9

Please sign in to comment.