Skip to content

Commit

Permalink
Benchmark read view content stripping
Browse files Browse the repository at this point in the history
* Uses round trip testing infrastructure
* Add an extra configurable step to benchmark stripped attributes
* Adapt wrapper to conditionally this benchmark
* Add example config for the benchmark
* Remove unused html diffing codebase

Bug: T272331
Change-Id: I7b6ac435e72c1257b8637015e6fea45e53076ce5
  • Loading branch information
johngian authored and jenkins-bot committed Feb 18, 2022
1 parent e94f523 commit ea7ed64
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 25 deletions.
61 changes: 61 additions & 0 deletions bin/benchmark.readViewStrip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
const zlib = require("zlib");
const XMLSerializer = require("../lib/wt2html/XMLSerializer.js");
const { DOMTraverser } = require("../lib/utils/DOMTraverser.js");
const { DOMUtils } = require("../lib/utils/DOMUtils.js");
const { fetchHTML } = require("./diff.html.js");

function stripReadView(root, rules) {
const traverser = new DOMTraverser();

traverser.addHandler(null, (node) => {

function matcher(rule, value) {
if (rule && rule.regex) {
const regex = new RegExp(rule.regex);
return regex.test(value);
}
return true;
}

Object.entries(rules).forEach(([attribute, rule]) => {
const value =
DOMUtils.isElt(node) &&
node.hasAttribute(attribute) &&
node.getAttribute(attribute);

if (value && matcher(rule, value)) {
node.removeAttribute(attribute);
}
});

return true;
});

traverser.traverse(root);
return root;
}

function diffSize(res, rules) {
const body = DOMUtils.parseHTML(res).body;
const deflatedOriginalSize = zlib.deflateSync(
XMLSerializer.serialize(body).html
).byteLength;

const stripped = stripReadView(body, rules);
const deflatedStrippedSize = zlib.deflateSync(
XMLSerializer.serialize(stripped).html
).byteLength;

return {
originalSize: deflatedOriginalSize,
strippedSize: deflatedStrippedSize,
};
}

function benchmarkReadView(endpoint, proxy, domain, title, rules) {
return fetchHTML(endpoint, proxy, domain, title).then((res) => {
return diffSize(res, rules);
});
}

module.exports.benchmarkReadView = benchmarkReadView;
1 change: 1 addition & 0 deletions bin/diff.html.js
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,5 @@ if (require.main === module) {
module.exports.htmlDiff = htmlDiff;
module.exports.fileDiff = fileDiff;
module.exports.displayResult = displayResult;
module.exports.fetchHTML = fetchHTML;
}
22 changes: 10 additions & 12 deletions bin/roundtrip-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

require('../core-upgrade.js');
require('colors');
const { htmlDiff } = require('./diff.html.js');
const { benchmarkReadView } = require('./benchmark.readViewStrip.js');

var entities = require('entities');
var fs = require('fs');
Expand Down Expand Up @@ -820,18 +820,16 @@ var runTests = Promise.async(function *(title, options, formatter) {
error = e;
exitCode = 1;
}
var output = formatter(error, prefix, title, data.diffs, profile);
// write diffs to $outDir/DOMAIN/TITLE
if (options.htmlDiffConfig && Math.random() < (options.htmlDiffConfig.sampleRate || 0)) {
const outDir = options.htmlDiffConfig.outDir || "/tmp/htmldiffs";
const dir = `${outDir}/${domain}`;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
const diffs = yield htmlDiff(options.htmlDiffConfig, domain, title);
// parsoidOptions.title is uri-encoded
fs.writeFileSync(`${dir}/${parsoidOptions.title}`, diffs.join('\n'));

if (options.readViewStripBenchmark && Math.random() < (options.readViewStripBenchmark.sampleRate || 0)) {
const rules = options.readViewStripBenchmark.rules;
const diffSizes = yield benchmarkReadView(options.parsoidURL, options.proxyURL, domain, title, rules);
profile.benchmarkReadViewOriginalSize = diffSizes.originalSize;
profile.benchmarkReadViewStrippedSize = diffSizes.strippedSize;
}

var output = formatter(error, prefix, title, data.diffs, profile);

return {
output: output,
exitCode: exitCode
Expand Down
9 changes: 0 additions & 9 deletions htmldiffs.config.example.yaml

This file was deleted.

11 changes: 11 additions & 0 deletions readviewstrip.config.example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
sampleRate: 0.05 # 5%

# Rules are defined as a map of an attribute name to a matcher
# Matchers are either null (strip all)
# or a regex to strip based on specific values
rules:
data-parsoid: null
data-mw: null
typeof: null
rel:
regex: "^mw:.*$"
9 changes: 5 additions & 4 deletions tests/testreduce/rtTestWrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const yaml = require('js-yaml');
// don't need to start their own.
let parsoidURLOpts = null;

let htmlDiffConfig = null;
let readViewStripBenchmark = null;

// Read ids from a file and return the first line of the file
function getTestRunId(opts) {
Expand All @@ -23,7 +23,7 @@ function _run(test) {
return rtTest.runTests(test.title, {
prefix: test.prefix,
parsoidURLOpts: parsoidURLOpts,
htmlDiffConfig: htmlDiffConfig
readViewStripBenchmark: readViewStripBenchmark
}, rtTest.xmlFormat).then(function(result) {
return result.output;
});
Expand All @@ -32,9 +32,10 @@ function _run(test) {
function runRoundTripTest(config, test) {
if (!parsoidURLOpts) {
parsoidURLOpts = config.parsoidPHP;
const configFile = path.resolve(__dirname, './htmldiffs.config.yaml');

const configFile = path.resolve(__dirname, './readviewstrip.config.yaml');
if (fs.existsSync(configFile)) {
htmlDiffConfig = yaml.load(fs.readFileSync(configFile, 'utf8'));
readViewStripBenchmark = yaml.load(fs.readFileSync(configFile, 'utf8'));
}
}
return _run(test);
Expand Down

0 comments on commit ea7ed64

Please sign in to comment.