Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9023531
Add a basic timing harness for paged.js.
KubaO May 18, 2026
7602636
Profile pagedjs rendering system.
KubaO May 18, 2026
ad431b9
Make the render time linear.
KubaO May 18, 2026
e9fe585
Investigate the number of flushes before and after the fix.
KubaO May 18, 2026
d4c16f4
Test further flush-related optimization opportunities.
KubaO May 18, 2026
b4acd77
Track perf/package-lock.json.
KubaO May 18, 2026
be0a6d4
Add incremental-update PDF writer to skip the pdf-lib roundtrip.
KubaO May 18, 2026
2c31b37
Profile pdf-lib roundtrip; survey Chromium PDF knobs.
KubaO May 18, 2026
92a7db8
Drop pagedjs-cli, render via a thin in-tree driver.
KubaO May 18, 2026
834b3a6
Restore live render progress dropped with pagedjs-cli.
KubaO May 18, 2026
3998055
Add wall-clock progress to the generate phase.
KubaO May 19, 2026
a50c1a9
Consolidate AtPage.finalizePage querySelector calls.
KubaO May 19, 2026
a1a1548
Batch finalizePage's max-width / max-height reads.
KubaO May 19, 2026
7bddb82
Memoize finalizePage's grid-template values by className.
KubaO May 19, 2026
370ec86
Hoist AtPage.finalizePage's grid-template emission to parse time.
KubaO May 19, 2026
61508ed
Document residual render O(n); identify aggressive-detach win.
KubaO May 19, 2026
348eb20
Ship aggressive detach: -22s render via removeChild + JS page counter.
KubaO May 19, 2026
ddc6d66
Fix colspan/rowspan typo in findOverflow's rowspan handling.
KubaO May 19, 2026
468f380
Fix counter-reset -> counter-set in Counters handler.
KubaO May 19, 2026
296593a
Cache lastChild.lastChild in findEndToken loop.
KubaO May 19, 2026
558de92
Document findEndToken win + createBreakToken dedup post-mortem.
KubaO May 19, 2026
0600aee
Skip findEndToken when no real onUnderflow handler is set.
KubaO May 19, 2026
b48be7e
Document Footnotes-handler skip attempt as gBCR migration trap.
KubaO May 20, 2026
9511fbb
Define "gBCR" terminology in perf README.
KubaO May 20, 2026
b1c4c2a
Promote find-callers.mjs into perf/ and gitignore measure outputs.
KubaO May 20, 2026
5956723
Additive backoff for renderTo overflow checks (-4.25s render).
KubaO May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
CLAUDE.md
/.htmltest.yml
node_modules/

# perf/measure.mjs --out targets at repo root (book.pdf + render.cpuprofile + timing.*)
/before/
/after-*/
/findoverflow-baseline/
9 changes: 8 additions & 1 deletion docs/assets/css/print.css
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
margin: 22mm 20mm 22mm 20mm;

@bottom-right {
content: counter(page);
/* Reads a JS-tracked page number set on each .pagedjs_page wrapper
by the Counters handler in docs/lib/paged.browser.js. Switched off
`counter(page)` because the aggressive-detach render optimization
(perf/detach-pages.js) physically removes finalized pages from the
DOM, which breaks CSS counter accumulation. The Counters handler
honours the same part-divider counter-reset rules as the original
counter(page) did, so part-restarts continue to work. */
content: var(--page-num);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif;
font-size: 9pt;
color: #555;
Expand Down
20 changes: 19 additions & 1 deletion docs/book.bat
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,27 @@
rem PDF render only. Run build.bat (or `bundle exec jekyll build`) first
rem so _site-pdf\book.html and its dependencies exist; this script
rem assumes the Pdfify plugin has already populated _site-pdf\.
rem
rem render-book.mjs drives puppeteer + paged.js + pdf-lib directly so
rem we control pdf-lib's parseSpeed (the default yields the event loop
rem between every 100 objects on load, adding ~32 s to a 100 s build
rem for no reason in Node -- see perf\README.md "Profiling pdf-lib's
rem load" for the full diagnosis). pagedjs-cli passed no options to
rem load/save and inherited that cost; we don't.
rem
rem --additional-script ..\perf\detach-pages.js injects a Paged.Handler
rem that hides each finalised page from Chromium's layout tree and
rem restores them all before page.pdf() runs. Drops total render from
rem ~104s to ~51s on the 1638-page book by eliminating the O(n^2)
rem getBoundingClientRect cost in paged.js's overflow walker.
if not exist _site-pdf\book.html (
echo _site-pdf\book.html not found. Run build.bat first.
exit /b 1
)
if not exist node_modules\puppeteer\package.json (
echo Installing docs\ dependencies...
call npm install
if errorlevel 1 exit /b 1
)
if not exist _pdf mkdir _pdf
npx pagedjs-cli _site-pdf\book.html -o _pdf\book.pdf --outline-tags h1,h2,h3,h4 -t 600000
node render-book.mjs _site-pdf\book.html -o _pdf\book.pdf --outline-tags h1,h2,h3,h4 --additional-script ..\perf\detach-pages.js
181 changes: 181 additions & 0 deletions docs/lib/outline.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// Adapted verbatim from pagedjs-cli 0.4.3 src/outline.js
// (https://github.com/pagedjs/pagedjs-cli) -- MIT, Copyright (c) 2018
// Adam Hyde. Pulled in directly so we no longer need the pagedjs-cli
// dependency.
//
// Two exports:
// parseOutline(page, tags, enableWarnings) -- runs in the browser
// via page.evaluate. Walks document.querySelectorAll(tags.join(','))
// to produce a nested outline tree of {title, destination, children}.
// Also creates a hidden <a href="#id"> link-holder so Chrome
// registers a named destination for each heading -- without that,
// the named-destination Dest entries we write in setOutline would
// point nowhere.
//
// setOutline(pdfDoc, outline, enableWarnings) -- runs in Node on the
// parsed pdf-lib document. Walks the outline tree and writes a
// /Outlines tree of PDF dicts using pdf-lib's low-level API
// (PDFDict.fromMapWithContext, etc.). Each entry's Dest is a name
// that Chrome's /Dests catalog entry resolves to a page+coords.

import { PDFDict, PDFName, PDFNumber, PDFHexString } from "pdf-lib";
import { decode as htmlEntitiesDecode } from "html-entities";

const SanitizeXMLRx = /<[^>]+>/g;

function sanitize (string) {
if (string.includes("<")) {
string = string.replace(SanitizeXMLRx, "");
}
return htmlEntitiesDecode(string);
}

export async function parseOutline(page, tags, enableWarnings) {
return await page.evaluate((tags) => {
const tagsToProcess = [];
for (const node of document.querySelectorAll(tags.join(","))) {
tagsToProcess.push(node);
}
tagsToProcess.reverse();

const root = {children: [], depth: -1};
let currentOutlineNode = root;

const linkHolder = document.createElement("div");
const body = document.querySelector("body");
linkHolder.style.display = "none";
body.insertBefore(linkHolder, body.firstChild);

while (tagsToProcess.length > 0) {
const tag = tagsToProcess.pop();
const orderDepth = tags.indexOf(tag.tagName.toLowerCase());
const dest = encodeURIComponent(tag.id).replace(/%/g, "#25");

// Add to link holder to register a destination
const hiddenLink = document.createElement("a");
hiddenLink.href = "#"+dest;
linkHolder.appendChild(hiddenLink);

if (orderDepth < currentOutlineNode.depth) {
currentOutlineNode = currentOutlineNode.parent;
tagsToProcess.push(tag);
} else {
const newNode = {
title: tag.innerText.trim(),
// encode section ID until https://bugs.chromium.org/p/chromium/issues/detail?id=985254 is fixed
destination: dest,
children: [],
depth: orderDepth,
};
if (orderDepth == currentOutlineNode.depth) {
if (currentOutlineNode.parent) {
newNode.parent = currentOutlineNode.parent;
currentOutlineNode.parent.children.push(newNode);
} else {
newNode.parent = currentOutlineNode;
currentOutlineNode.children.push(newNode);
}
currentOutlineNode = newNode;
} else if (orderDepth > currentOutlineNode.depth) {
newNode.parent = currentOutlineNode;
currentOutlineNode.children.push(newNode);
currentOutlineNode = newNode;
}
}
}

const stripParentProperty = (node) => {
node.parent = undefined;
for (const child of node.children) {
stripParentProperty(child);
}
};
stripParentProperty(root);
return root.children;
}, tags);
}

function setRefsForOutlineItems (layer, context, parentRef) {
for (const item of layer) {
item.ref = context.nextRef();
item.parentRef = parentRef;
setRefsForOutlineItems(item.children, context, item.ref);
}
}

function countChildrenOfOutline (layer) {
let count = 0;
for (const item of layer) {
++count;
count += countChildrenOfOutline(item.children);
}
return count;
}

function buildPdfObjectsForOutline (layer, context) {
for (const [i, item] of layer.entries()) {
const prev = layer[i - 1];
const next = layer[i + 1];

const pdfObject = new Map([
[PDFName.of("Title"), PDFHexString.fromText(sanitize(item.title))],
[PDFName.of("Dest"), PDFName.of(item.destination)],
[PDFName.of("Parent"), item.parentRef]
]);
if (prev) {
pdfObject.set(PDFName.of("Prev"), prev.ref);
}
if (next) {
pdfObject.set(PDFName.of("Next"), next.ref);
}
if (item.children.length > 0) {
pdfObject.set(PDFName.of("First"), item.children[0].ref);
pdfObject.set(PDFName.of("Last"), item.children[item.children.length - 1].ref);
pdfObject.set(PDFName.of("Count"), PDFNumber.of(countChildrenOfOutline(item.children)));
}

context.assign(item.ref, PDFDict.fromMapWithContext(pdfObject, context));

buildPdfObjectsForOutline(item.children, context);
}
}

function generateWarningsAboutMissingDestinations (layer, pdfDoc) {
const dests = pdfDoc.context.lookup(pdfDoc.catalog.get(PDFName.of("Dests")));
// Dests can be undefined if the PDF wasn't successfully generated (for instance if Paged.js threw an exception)
if (dests) {
const validDestinationTargets = dests.entries().map(([key, _]) => key.value());
for (const item of layer) {
if (item.destination && !validDestinationTargets.includes("/" + item.destination)) {
console.warn(`Unable to find destination "${item.destination}" while generating PDF outline.`);
}
generateWarningsAboutMissingDestinations(item.children, pdfDoc);
}
}
}

export async function setOutline (pdfDoc, outline, enableWarnings=false) {
const context = pdfDoc.context;
const outlineRef = context.nextRef();

if (outline.length === 0) {
return pdfDoc;
}

if (enableWarnings) {
generateWarningsAboutMissingDestinations(outline, pdfDoc);
}

setRefsForOutlineItems(outline, context, outlineRef);
buildPdfObjectsForOutline(outline, context);

const outlineObject = PDFDict.fromMapWithContext(new Map([
[PDFName.of("First"), outline[0].ref],
[PDFName.of("Last"), outline[outline.length - 1].ref],
[PDFName.of("Count"), PDFNumber.of(countChildrenOfOutline(outline))]
]), context);
context.assign(outlineRef, outlineObject);

pdfDoc.catalog.set(PDFName.of("Outlines"), outlineRef);
return pdfDoc;
}
Loading
Loading