Skip to content

Commit

Permalink
tree walk based replacement for .textContent. more efficient cleanStyles
Browse files Browse the repository at this point in the history
  • Loading branch information
arrix committed Nov 19, 2010
1 parent b7fa45d commit b367442
Showing 1 changed file with 68 additions and 0 deletions.
68 changes: 68 additions & 0 deletions lib/readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -1916,6 +1916,74 @@ var jsdom = require('jsdom'),
getArticleFooter: function () {
return document.createElement("DIV");
},

// hundredfold faster
// use native string.trim
// jsdom's implementation of textContent is innerHTML + strip tags + HTMLDecode
// here we replace it with an optimized tree walker
getInnerText: function (e, normalizeSpaces) {
if (normalizeSpaces === undefined) normalizeSpaces = true;

function TextWalker(node, func) {
function walk(cur) {
var children, len, i;
if (cur.nodeType == 3) {
func(cur);
return;
} else if (cur.nodeType != 1) {
return;
}

children = cur.childNodes;
for (i = 0, len = children.length; i < len; i++) {
walk(children[i]);
}
}
walk(node);
}

var textContent = '';
TextWalker(e, function(cur) {
textContent += cur.nodeValue;
});
textContent = textContent.trim();
//var textContent = e.textContent.trim();

if(normalizeSpaces) {
return textContent.replace( readability.regexps.normalize, " "); }
else {
return textContent;
}
},

cleanStyles: function (e) {
e = e || document;
// var all = e.getElementsByTagName('*'), i, len, node;
// for (i = 0, len = all.length; i < len; i++) {
// node = all[i];
// if (node.className != 'readability-styled') {
// node.removeAttribute("style");
// }
// }
// return;

function walk(cur) {
var children, i, l;

if (cur.nodeType == 1) {
if (cur.className != 'readability-styled') {
cur.removeAttribute("style");
}

children = cur.childNodes;
for (i = 0, l = children.length; i < l; i++) {
walk(children[i]);
}
}
}
walk(e);
},

//// new methods ///
reset: function() {
var z = this;
Expand Down

0 comments on commit b367442

Please sign in to comment.