Skip to content

Commit

Permalink
Misc Rewriting Fixes (2.12.5) (#78)
Browse files Browse the repository at this point in the history
* rewriting fixes:
- headers: catch exception if Link header rewriting fails, just use original Link header
- twitter-specific replay: add rules for video stream rewriting for embedded twitter videos
- twitter-specific: add null check that occasionally broke video replay
- js rewriting: rewrite <script type='module'> scripts with module-specific rewriting
- tests: additional tests for new twitter rewriting
- tests: add rewrite non-url link header

- bump version to 2.12.5
  • Loading branch information
ikreymer committed Sep 2, 2022
1 parent 95c88e9 commit 6a70382
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 71 deletions.
2 changes: 1 addition & 1 deletion dist/sw.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "@webrecorder/wabac",
"version": "2.12.4",
"version": "2.12.5",
"main": "index.js",
"license": "AGPL-3.0-or-later",
"dependencies": {
Expand Down
4 changes: 2 additions & 2 deletions src/collection.js
Expand Up @@ -211,16 +211,16 @@ class Collection {
if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; return this; } }
const window = wrapObj("window");
const globalThis = wrapObj("globalThis");
const document = wrapObj("document");
const location = wrapObj("location");
const top = wrapObj("top");
const parent = wrapObj("parent");
const frames = wrapObj("frames");
const opener = wrapObj("opener");
const __self = wrapObj("self");
const __globalThis = wrapObj("globalThis");
export { window, document, location, top, parent, frames, opener, __self as self };
export { window, document, location, top, parent, frames, opener, __self as self, __globalThis as globalThis };
`;

const payload = new TextEncoder().encode(string);
Expand Down
91 changes: 53 additions & 38 deletions src/rewrite/dsruleset.js
Expand Up @@ -43,7 +43,14 @@ const DEFAULT_RULES = [
{
contains: ["api.twitter.com/2/", "twitter.com/i/api/2/", "twitter.com/i/api/graphql/"],
rxRules: [
[/"video_info".*?}]}/, ruleRewriteTwitterVideo]
[/"video_info".*?}]}/, ruleRewriteTwitterVideo("\"video_info\":")]
]
},

{
contains: ["cdn.syndication.twimg.com/tweet-result"],
rxRules: [
[/"video".*?viewCount":\d+}/, ruleRewriteTwitterVideo("\"video\":")]
]
},

Expand Down Expand Up @@ -97,59 +104,67 @@ function ruleReplace(string) {
// }

// ===========================================================================
function ruleRewriteTwitterVideo(string, opts) {
if (!opts) {
return string;
}
function ruleRewriteTwitterVideo(prefix) {

// if (!opts.live && !(opts.response && opts.response.extraOpts && opts.response.extraOpts.rewritten)) {
// return string;
// }
return (string, opts) => {
if (!opts) {
return string;
}

const origString = string;
// if (!opts.live && !(opts.response && opts.response.extraOpts && opts.response.extraOpts.rewritten)) {
// return string;
// }

try {
const prefix = "\"video_info\":";
const origString = string;

const MAX_BITRATE = 5000000;
try {
const MAX_BITRATE = 5000000;

const extraOpts = opts.response && opts.response.extraOpts;
const extraOpts = opts.response && opts.response.extraOpts;

let maxBitrate = MAX_BITRATE;
let maxBitrate = MAX_BITRATE;

if (opts.save) {
opts.save.maxBitrate = maxBitrate;
} else if (extraOpts.maxBitrate) {
maxBitrate = extraOpts.maxBitrate;
}
if (opts.save) {
opts.save.maxBitrate = maxBitrate;
} else if (extraOpts && extraOpts.maxBitrate) {
maxBitrate = extraOpts.maxBitrate;
}

string = string.slice(prefix.length);
string = string.slice(prefix.length);

const data = JSON.parse(string);
const data = JSON.parse(string);

let bestVariant = null;
let bestBitrate = 0;
let bestVariant = null;
let bestBitrate = 0;
//sort by src
let bestSrc = "";

for (const variant of data.variants) {
if (variant.content_type !== "video/mp4") {
continue;
}
for (const variant of data.variants) {
if ((variant.content_type && variant.content_type !== "video/mp4") ||
(variant.type && variant.type !== "video/mp4")) {
continue;
}

if (variant.bitrate && variant.bitrate > bestBitrate && variant.bitrate <= maxBitrate) {
bestVariant = variant;
bestBitrate = variant.bitrate;
if (variant.bitrate && variant.bitrate > bestBitrate && variant.bitrate <= maxBitrate) {
bestVariant = variant;
bestBitrate = variant.bitrate;
} else if (variant.src && variant.src > bestSrc) {
bestVariant = variant;
bestSrc = variant.src;
}
}
}

if (bestVariant) {
data.variants = [bestVariant];
}
if (bestVariant) {
data.variants = [bestVariant];
}

return prefix + JSON.stringify(data);
return prefix + JSON.stringify(data);

} catch (e) {
return origString;
}
} catch (e) {
console.warn("rewriter error: ", e);
return origString;
}
};
}

// ===========================================================================
Expand Down
41 changes: 25 additions & 16 deletions src/rewrite/html.js
Expand Up @@ -253,7 +253,7 @@ class HTMLRewriter
let insertAdded = false;

let context = "";
let scriptRw = false;
let scriptRw = "";
let replaceTag = null;

const addInsert = () => {
Expand Down Expand Up @@ -290,7 +290,11 @@ class HTMLRewriter

const scriptType = this.getAttr(startTag.attrs, "type");

scriptRw = !scriptType || (scriptType.indexOf("javascript") >= 0 || scriptType.indexOf("ecmascript") >= 0);
if (scriptType === "module") {
scriptRw = "module";
} else if (!scriptType || (scriptType.indexOf("javascript") >= 0 || scriptType.indexOf("ecmascript") >= 0)) {
scriptRw = "script";
}
break;
}

Expand Down Expand Up @@ -323,25 +327,30 @@ class HTMLRewriter
});

rwStream.on("text", (textToken, raw) => {
if (context === "script") {
doEmit(scriptRw ? rewriter.rewriteJS(textToken.text) : textToken.text);
} else if (context === "style") {
doEmit(rewriter.rewriteCSS(textToken.text));
} else {
// if raw data is different and raw data potentially cut off, just use the parsedText
if (raw !== textToken.text && (textToken.sourceCodeLocation.startOffset - rwStream.posTracker.droppedBufferSize) < 0) {
raw = textToken.text;
const text = (() => {
if (context === "script") {
if (scriptRw) {
const isModule = scriptRw === "module";
const prefix = rewriter.prefix;
return rewriter.rewriteJS(textToken.text, {isModule, prefix});
} else {
return textToken.text;
}
} else if (context === "style") {
return rewriter.rewriteCSS(textToken.text);
} else {
// if raw data is different and raw data potentially cut off, just use the parsedText
if (raw !== textToken.text && (textToken.sourceCodeLocation.startOffset - rwStream.posTracker.droppedBufferSize) < 0) {
raw = textToken.text;
}
return this.rewriteHTMLText(raw);
}
raw = this.rewriteHTMLText(raw);
doEmit(raw);
}
});
})();

function doEmit(text) {
for (let i = 0; i < text.length; i += MAX_STREAM_CHUNK_SIZE) {
rwStream.emitRaw(text.slice(i, i + MAX_STREAM_CHUNK_SIZE));
}
}
});

const sourceGen = response.createIter();
let hasData = false;
Expand Down
29 changes: 19 additions & 10 deletions src/rewrite/index.js
Expand Up @@ -294,7 +294,7 @@ class Rewriter {

// JS
rewriteJS(text, opts) {
const noUrlProxyRewrite = opts && !opts.rewriteUrl;
const noUrlProxyRewrite = opts && !opts.rewriteUrl && opts.isModule === undefined;
const dsRules = noUrlProxyRewrite ? baseRules : this.dsRules;
const dsRewriter = dsRules.getRewriter(this.baseUrl);

Expand Down Expand Up @@ -509,15 +509,7 @@ class Rewriter {

case "link":
if (urlRewrite && !isAjax) {
const parsed = parseLinkHeader(header[1]);

for (const entry of Object.values(parsed)) {
if (entry.url) {
entry.url = this.rewriteUrl(entry.url);
}
}

new_headers.append(header[0], formatLinkHeader(parsed));
new_headers.append(header[0], this.rewriteLinkHeader(header[1]));
} else {
new_headers.append(header[0], header[1]);
}
Expand All @@ -530,6 +522,23 @@ class Rewriter {

return new_headers;
}

rewriteLinkHeader(value) {
try {
const parsed = parseLinkHeader(value);

for (const entry of Object.values(parsed)) {
if (entry.url) {
entry.url = this.rewriteUrl(entry.url);
}
}

return formatLinkHeader(parsed);
} catch (e) {
console.warn("Error parsing link header: " + value);
return value;
}
}
}

export { Rewriter, ArchiveResponse, baseRules, jsRules };
Expand Down
8 changes: 6 additions & 2 deletions src/rewrite/jsrewriter.js
Expand Up @@ -140,7 +140,11 @@ if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; re
return `import { ${localDecls.join(", ")} } from "${prefix}__wb_module_decl.js";\n`;
}

isModule(text) {
isModule(text, opts) {
if (opts && opts.isModule) {
return true;
}

if (text.indexOf("import") >= 0 && text.match(IMPORT_RX)) {
return true;
}
Expand All @@ -155,7 +159,7 @@ if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; re
rewrite(text, opts) {
let newText;

if (this.isModule(text)) {
if (this.isModule(text, opts)) {
return this.getModuleDecl(GLOBAL_OVERRIDES, opts.prefix) + super.rewrite(text, opts);
}

Expand Down
19 changes: 18 additions & 1 deletion test/rewriteHTML.js
Expand Up @@ -44,8 +44,18 @@ let parent = _____WB$wombat$assign$function_____("parent");
let frames = _____WB$wombat$assign$function_____("frames");
let opener = _____WB$wombat$assign$function_____("opener");
let arguments;
\n` + text + "\n\n}";
${text}
}`;

}


function wrapScriptModule(text) {
return `\
<script type="module">import { window, globalThis, self, document, location, top, parent, frames, opener } from "http://localhost:8080/prefix/20201226101010/__wb_module_decl.js";
${text}</script>`;
}


Expand Down Expand Up @@ -306,6 +316,13 @@ test("script", rewriteHtml,
{useBaseRules: false}
);

// module script
test("script", rewriteHtml,
"<script type=\"module\">console.log(window.parent.location.href);</script>",
wrapScriptModule("console.log(window.parent.location.href);"),
{useBaseRules: false}
);

test("object pdf", rewriteHtml,
"<object type=\"application/pdf\" data=\"https://example.com/some/file.pdf\">",
"<iframe type=\"application/pdf\" src=\"https://example.com/some/file.pdf\">");
Expand Down
9 changes: 9 additions & 0 deletions test/rewriteHeaders.js
Expand Up @@ -37,4 +37,13 @@ test(rewriteHeaders,
true
);

// Not rewritten, not a url
test(rewriteHeaders,
"Link",
"<sometext>; rel=\"test\"; as=\"script\"",
"<sometext>; rel=\"test\"; as=\"script\"",
);




38 changes: 38 additions & 0 deletions test/rewriteVideo.js
Expand Up @@ -234,8 +234,46 @@ test("Twitter rewrite json", async t => {
const result = await doRewrite({content: JSON.stringify(content), contentType: "application/json", url: api + "some/endpoint", extraOpts});
t.deepEqual(JSON.parse(result), expected);
}
});


test("Twitter rewrite embedded json", async t => {

const content = {
"video": {
"some_data": "other",
"variants": [{
"type": "application/x-mpegURL",
"src": "https://example.com/100x100/A"
}, {
"type": "video/mp4",
"src": "https://example.com/100x100/B"
}, {
"type": "video/mp4",
"src": "https://example.com/200x200/B"
}, {
"type": "video/mp4",
"src": "https://example.com/300x300/B"
}],
"viewCount": 1234
}
};

const expected = {
"video": {
"some_data": "other",
"variants": [{
"type": "video/mp4",
"src": "https://example.com/300x300/B"
}],
"viewCount": 1234
}
};

const extraOpts = {rewritten: true};

for (const api of ["https://cdn.syndication.twimg.com/tweet-result?some=value"]) {
const result = await doRewrite({content: JSON.stringify(content), contentType: "application/json", url: api + "some/endpoint", extraOpts});
t.deepEqual(JSON.parse(result), expected);
}
});

0 comments on commit 6a70382

Please sign in to comment.