diff --git a/src/html/mod.rs b/src/html/mod.rs index e63d9b5..9fde679 100644 --- a/src/html/mod.rs +++ b/src/html/mod.rs @@ -275,17 +275,11 @@ impl Document { &self, doc_buf: &'b mut DocumentBuffers, check_anchors: bool, - get_paragraphs: bool, ) -> Result>, Error> where 'b: 'l, { - self.links_from_read::<_, P>( - doc_buf, - fs::File::open(&*self.path)?, - check_anchors, - get_paragraphs, - ) + self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors) } fn links_from_read<'b, 'l, R: Read, P: ParagraphWalker>( @@ -293,7 +287,6 @@ impl Document { doc_buf: &'b mut DocumentBuffers, read: R, check_anchors: bool, - get_paragraphs: bool, ) -> Result>, Error> where 'b: 'l, @@ -308,7 +301,6 @@ impl Document { link_buf: &mut link_buf, in_paragraph: false, last_paragraph_i: 0, - get_paragraphs, buffers: &mut doc_buf.parser_buffers, current_tag_is_closing: false, check_anchors, diff --git a/src/html/parser.rs b/src/html/parser.rs index 8a3cbb0..2794f2e 100644 --- a/src/html/parser.rs +++ b/src/html/parser.rs @@ -76,7 +76,6 @@ pub struct HyperlinkEmitter<'a, 'l, 'd, P: ParagraphWalker> { pub link_buf: &'d mut BumpVec<'a, Link<'l, P::Paragraph>>, pub in_paragraph: bool, pub last_paragraph_i: usize, - pub get_paragraphs: bool, pub buffers: &'d mut ParserBuffers, pub current_tag_is_closing: bool, pub check_anchors: bool, @@ -179,7 +178,7 @@ where } fn emit_string(&mut self, c: &[u8]) { - if self.get_paragraphs && self.in_paragraph { + if !P::is_noop() && self.in_paragraph { self.paragraph_walker.update(c); } } @@ -198,17 +197,20 @@ where self.flush_old_attribute(); self.buffers.last_start_tag.clear(); + + let is_paragraph_tag = !P::is_noop() && is_paragraph_tag(&self.buffers.current_tag_name); + if !self.current_tag_is_closing { self.buffers .last_start_tag .extend(&self.buffers.current_tag_name); - if is_paragraph_tag(&self.buffers.current_tag_name) { + if is_paragraph_tag { self.in_paragraph = true; self.last_paragraph_i = self.link_buf.len(); self.paragraph_walker.finish_paragraph(); } - } else if is_paragraph_tag(&self.buffers.current_tag_name) { + } else if is_paragraph_tag { let paragraph = self.paragraph_walker.finish_paragraph(); if self.in_paragraph { for link in &mut self.link_buf[self.last_paragraph_i..] { @@ -229,7 +231,7 @@ where } fn set_self_closing(&mut self) { - if is_paragraph_tag(&self.buffers.current_tag_name) { + if !P::is_noop() && is_paragraph_tag(&self.buffers.current_tag_name) { self.in_paragraph = false; } } diff --git a/src/main.rs b/src/main.rs index 0ecf51c..1a9ce8a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -150,11 +150,7 @@ where { println!("Reading files"); - let html_result = extract_html_links::, P>( - &base_path, - check_anchors, - sources_path.is_some(), - )?; + let html_result = extract_html_links::, P>(&base_path, check_anchors)?; let used_links_len = html_result.collector.used_links_count(); println!( @@ -329,7 +325,7 @@ fn dump_paragraphs(path: PathBuf) -> Result<(), Error> { Some(x) if HTML_FILES.contains(&x) => { let document = Document::new(Path::new(""), &path); document - .links::>(&mut doc_buf, false, true)? + .links::>(&mut doc_buf, false)? .filter_map(|link| Some((link.into_paragraph()?, None))) .collect() } @@ -388,7 +384,6 @@ fn walk_files( fn extract_html_links, P: ParagraphWalker>( base_path: &Path, check_anchors: bool, - get_paragraphs: bool, ) -> Result, Error> { let result: Result<_, Error> = walk_files(base_path) .try_fold( @@ -413,7 +408,7 @@ fn extract_html_links, P: ParagraphWalker>( } for link in document - .links::

(&mut doc_buf, check_anchors, get_paragraphs) + .links::

(&mut doc_buf, check_anchors) .with_context(|| format!("Failed to read file {}", document.path.display()))? { collector.ingest(link); @@ -497,7 +492,7 @@ fn extract_markdown_paragraphs( fn match_all_paragraphs(base_path: PathBuf, sources_path: PathBuf) -> Result<(), Error> { println!("Reading files"); let html_result = - extract_html_links::, ParagraphHasher>(&base_path, true, true)?; + extract_html_links::, ParagraphHasher>(&base_path, true)?; println!("Reading source files"); let paragraps_to_sourcefile = extract_markdown_paragraphs::(&sources_path)?; diff --git a/src/paragraph.rs b/src/paragraph.rs index 81a8aa4..fef7f04 100644 --- a/src/paragraph.rs +++ b/src/paragraph.rs @@ -15,6 +15,12 @@ pub trait ParagraphWalker: Send { type Paragraph: Clone + Eq + PartialEq + Hash + Ord + PartialOrd + Send + 'static; fn new() -> Self; + + #[inline] + fn is_noop() -> bool { + false + } + fn update_raw(&mut self, text: &[u8]); fn finish_paragraph(&mut self) -> Option; @@ -101,12 +107,20 @@ pub enum VoidParagraph {} impl ParagraphWalker for NoopParagraphWalker { type Paragraph = VoidParagraph; + #[inline] fn new() -> Self { NoopParagraphWalker } + #[inline] + fn is_noop() -> bool { + true + } + + #[inline] fn update_raw(&mut self, _text: &[u8]) {} + #[inline] fn finish_paragraph(&mut self) -> Option { None }