Skip to content

Commit

Permalink
reuse io buffer between documents (#164)
Browse files Browse the repository at this point in the history
  • Loading branch information
untitaker committed Jul 7, 2023
1 parent 50178ba commit 93b96a3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rayon = "1.5.1"
pulldown-cmark = "0.8.0"
blake3 = "1.0.0"

html5gum = "0.5.3"
html5gum = "0.5.4"
jwalk = "0.7.0"
patricia_tree = "0.3.1"
bumpalo = { version = "3.11.1", features = ["collections"] }
Expand Down
17 changes: 15 additions & 2 deletions src/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,27 @@ impl<'a, P> Link<'a, P> {
}
}

const BUF_SIZE: usize = 1024 * 1024;

/// This struct is initialized once per "batch of documents" that will be processed on a single
/// worker thread (as determined by rayon). It pays off to do as much heap allocation as possible
/// here once instead of in Document::links.
#[derive(Default)]
pub struct DocumentBuffers {
arena: bumpalo::Bump,
html_read_buffer: Box<[u8; BUF_SIZE]>,
parser_buffers: parser::ParserBuffers,
}

impl Default for DocumentBuffers {
fn default() -> Self {
DocumentBuffers {
arena: Default::default(),
html_read_buffer: Box::new([0; BUF_SIZE]),
parser_buffers: Default::default(),
}
}
}

impl DocumentBuffers {
pub fn reset(&mut self) {
self.arena.reset();
Expand Down Expand Up @@ -301,7 +313,8 @@ impl Document {
current_tag_is_closing: false,
check_anchors,
};
let reader = Tokenizer::new_with_emitter(IoReader::new(read), emitter);
let ioreader = IoReader::new_with_buffer(read, doc_buf.html_read_buffer.as_mut());
let reader = Tokenizer::new_with_emitter(ioreader, emitter);

for error in reader {
error?;
Expand Down

0 comments on commit 93b96a3

Please sign in to comment.