Skip to content

Commit

Permalink
mdn: process the new hydration script
Browse files Browse the repository at this point in the history
Close #359
  • Loading branch information
myfreeer committed Jul 18, 2021
1 parent 957ca9e commit dd7025b
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 40 deletions.
12 changes: 11 additions & 1 deletion src/mdn/process-html/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ import {preProcessRemoveElements} from './process-remove-elements';
import {
preProcessYariData,
downloadAndRenderYariCompatibilityData,
ProcessYariDataResult
ProcessYariDataResult,
preProcessYariHydrationData
} from './process-yari-data';

const INJECT_JS_PATH = '/static/js/inject.js';
Expand Down Expand Up @@ -102,6 +103,15 @@ export const preProcessHtml = async (
dataScript = elem;
isYariDocs = true;
}
if (elem.attr('id') === 'hydration' &&
elem.attr('type') === 'application/json') {
if (yariCompatibilityData) {
error.warn('preProcessHtml: multiple yari data found', res.url);
}
yariCompatibilityData = preProcessYariHydrationData(text, elem);
dataScript = elem;
isYariDocs = true;
}
}
}

Expand Down
121 changes: 82 additions & 39 deletions src/mdn/process-html/process-yari-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,46 +109,11 @@ export type ProcessYariDataResult = MdnYariCompatibilityDataWithUrl[] | void;

/// endregion type def

const JSON_PARSE_STR = 'JSON.parse(';

/**
* Process yari window.__data__ to reduce its size
* and extract browser_compatibility info
* Note: a page can have multiple browser_compatibility section
* @param text elem.text()
* @param elem the script element
* @return the browser_compatibility data
*/
export const preProcessYariData = (
text: string, elem: Cheerio
): ProcessYariDataResult => {
let jsonStrBeginIndex: number = text.indexOf(JSON_PARSE_STR),
jsonStrEndIndex: number,
escapedJsonText: string,
jsonText: string,
data: MdnYariDoc | void;
const browserCompatibilityData: MdnYariCompatibilityData[] = [];
if (jsonStrBeginIndex < 1 ||
jsonStrBeginIndex + JSON_PARSE_STR.length > text.length) {
return;
}
jsonStrBeginIndex += JSON_PARSE_STR.length;
if (!((jsonStrEndIndex = text.lastIndexOf('")')) > 0 &&
++jsonStrEndIndex < text.length &&
(escapedJsonText = text.slice(jsonStrBeginIndex, jsonStrEndIndex)))) {
return;
}
export function preProcessYariDocData(
data: MdnYariDoc
): MdnYariCompatibilityDataWithUrl[] | void {

try {
// unescape string for json
jsonText = JSON.parse(escapedJsonText);
data = JSON.parse(jsonText);
} catch (e) {
errorLogger.warn('postProcessYariData: json parse fail', e);
}
if (!data) {
return;
}
if (data.sidebarHTML) {
data.sidebarHTML = '';
}
Expand All @@ -163,6 +128,7 @@ export const preProcessYariData = (
data.other_translations = [];
}

const browserCompatibilityData: MdnYariCompatibilityData[] = [];
if (data.body?.length) {
for (let i = 0, item: MdnYariDocBody; i < data.body.length; i++) {
item = data.body[i];
Expand Down Expand Up @@ -193,6 +159,83 @@ export const preProcessYariData = (
}
}
}
return resultVal;
}

/**
* Process yari hydration script to reduce its size
* and extract browser_compatibility info
* Note: a page can have multiple browser_compatibility section
* https://github.com/mdn/yari/commit/107cf0ec5555405fe723d3b914ffd8246cac004c
* @param text elem.text()
* @param elem the script element
* @return the browser_compatibility data
*/
export const preProcessYariHydrationData = (
text: string, elem: Cheerio
): ProcessYariDataResult => {

let data: { doc?: MdnYariDoc } | void;
try {
data = JSON.parse(text);
} catch (e) {
errorLogger.warn('postProcessYariData: json parse fail', e);
}

if (!data || !data.doc) {
return;
}
const resultVal = preProcessYariDocData(data.doc);

text = JSON.stringify(data)
// escape html for js
.replace(/</g, '\\x3c')
.replace(/>/g, '\\x3e');
elem.html(text);

return resultVal;
};

const JSON_PARSE_STR = 'JSON.parse(';

/**
* Process yari window.__data__ to reduce its size
* and extract browser_compatibility info
* Note: a page can have multiple browser_compatibility section
* @param text elem.text()
* @param elem the script element
* @return the browser_compatibility data
*/
export const preProcessYariData = (
text: string, elem: Cheerio
): ProcessYariDataResult => {
let jsonStrBeginIndex: number = text.indexOf(JSON_PARSE_STR),
jsonStrEndIndex: number,
escapedJsonText: string,
jsonText: string,
data: MdnYariDoc | void;
if (jsonStrBeginIndex < 1 ||
jsonStrBeginIndex + JSON_PARSE_STR.length > text.length) {
return;
}
jsonStrBeginIndex += JSON_PARSE_STR.length;
if (!((jsonStrEndIndex = text.lastIndexOf('")')) > 0 &&
++jsonStrEndIndex < text.length &&
(escapedJsonText = text.slice(jsonStrBeginIndex, jsonStrEndIndex)))) {
return;
}
try {
// unescape string for json
jsonText = JSON.parse(escapedJsonText);
data = JSON.parse(jsonText);
} catch (e) {
errorLogger.warn('postProcessYariData: json parse fail', e);
}

if (!data) {
return;
}
const resultVal = preProcessYariDocData(data);

// language=JavaScript
text = `window.__data__ = ${JSON.stringify(data)
Expand Down Expand Up @@ -250,7 +293,7 @@ export async function downloadAndRenderYariCompatibilityData(
const downloadResources =
await Promise.all(contexts.map(c => pipeline.download(c.res)));
const placeholders: Cheerio[] = [];
const elements = $('#content>.article>p');
const elements = $('#content>.article>p,#content>.main-page-content>p');
for (let i = 0; i < elements.length; i++) {
const el = $(elements[i]);
const text = el.text();
Expand Down

0 comments on commit dd7025b

Please sign in to comment.