-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Benchmark read view content stripping
* Uses round trip testing infrastructure * Add an extra configurable step to benchmark stripped attributes * Adapt wrapper to conditionally this benchmark * Add example config for the benchmark * Remove unused html diffing codebase Bug: T272331 Change-Id: I7b6ac435e72c1257b8637015e6fea45e53076ce5
- Loading branch information
Showing
6 changed files
with
88 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
const zlib = require("zlib"); | ||
const XMLSerializer = require("../lib/wt2html/XMLSerializer.js"); | ||
const { DOMTraverser } = require("../lib/utils/DOMTraverser.js"); | ||
const { DOMUtils } = require("../lib/utils/DOMUtils.js"); | ||
const { fetchHTML } = require("./diff.html.js"); | ||
|
||
function stripReadView(root, rules) { | ||
const traverser = new DOMTraverser(); | ||
|
||
traverser.addHandler(null, (node) => { | ||
|
||
function matcher(rule, value) { | ||
if (rule && rule.regex) { | ||
const regex = new RegExp(rule.regex); | ||
return regex.test(value); | ||
} | ||
return true; | ||
} | ||
|
||
Object.entries(rules).forEach(([attribute, rule]) => { | ||
const value = | ||
DOMUtils.isElt(node) && | ||
node.hasAttribute(attribute) && | ||
node.getAttribute(attribute); | ||
|
||
if (value && matcher(rule, value)) { | ||
node.removeAttribute(attribute); | ||
} | ||
}); | ||
|
||
return true; | ||
}); | ||
|
||
traverser.traverse(root); | ||
return root; | ||
} | ||
|
||
function diffSize(res, rules) { | ||
const body = DOMUtils.parseHTML(res).body; | ||
const deflatedOriginalSize = zlib.deflateSync( | ||
XMLSerializer.serialize(body).html | ||
).byteLength; | ||
|
||
const stripped = stripReadView(body, rules); | ||
const deflatedStrippedSize = zlib.deflateSync( | ||
XMLSerializer.serialize(stripped).html | ||
).byteLength; | ||
|
||
return { | ||
originalSize: deflatedOriginalSize, | ||
strippedSize: deflatedStrippedSize, | ||
}; | ||
} | ||
|
||
function benchmarkReadView(endpoint, proxy, domain, title, rules) { | ||
return fetchHTML(endpoint, proxy, domain, title).then((res) => { | ||
return diffSize(res, rules); | ||
}); | ||
} | ||
|
||
module.exports.benchmarkReadView = benchmarkReadView; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
sampleRate: 0.05 # 5% | ||
|
||
# Rules are defined as a map of an attribute name to a matcher | ||
# Matchers are either null (strip all) | ||
# or a regex to strip based on specific values | ||
rules: | ||
data-parsoid: null | ||
data-mw: null | ||
typeof: null | ||
rel: | ||
regex: "^mw:.*$" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters