Skip to content

Commit

Permalink
simplify paragraph walking, get rid of get_paragraphs
Browse files Browse the repository at this point in the history
We had two ways to tell the html parser whether paragraphs should be
tracked:

1. by passing a NoopParagraphWalker
2. by passing get_paragraphs=false

we can actually put a static method on NoopParagraphWalker to compute
the value of get_paragraphs, so that it gets inlined nicely

this does not bring any perf improvement but just makes the code
simpler (for me and the optimizer)
  • Loading branch information
untitaker committed Aug 2, 2023
1 parent 2a1172d commit fa88781
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 23 deletions.
10 changes: 1 addition & 9 deletions src/html/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,25 +275,18 @@ impl Document {
&self,
doc_buf: &'b mut DocumentBuffers,
check_anchors: bool,
get_paragraphs: bool,
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
where
'b: 'l,
{
self.links_from_read::<_, P>(
doc_buf,
fs::File::open(&*self.path)?,
check_anchors,
get_paragraphs,
)
self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors)
}

fn links_from_read<'b, 'l, R: Read, P: ParagraphWalker>(
&self,
doc_buf: &'b mut DocumentBuffers,
read: R,
check_anchors: bool,
get_paragraphs: bool,
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
where
'b: 'l,
Expand All @@ -308,7 +301,6 @@ impl Document {
link_buf: &mut link_buf,
in_paragraph: false,
last_paragraph_i: 0,
get_paragraphs,
buffers: &mut doc_buf.parser_buffers,
current_tag_is_closing: false,
check_anchors,
Expand Down
12 changes: 7 additions & 5 deletions src/html/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ pub struct HyperlinkEmitter<'a, 'l, 'd, P: ParagraphWalker> {
pub link_buf: &'d mut BumpVec<'a, Link<'l, P::Paragraph>>,
pub in_paragraph: bool,
pub last_paragraph_i: usize,
pub get_paragraphs: bool,
pub buffers: &'d mut ParserBuffers,
pub current_tag_is_closing: bool,
pub check_anchors: bool,
Expand Down Expand Up @@ -179,7 +178,7 @@ where
}

fn emit_string(&mut self, c: &[u8]) {
if self.get_paragraphs && self.in_paragraph {
if !P::is_noop() && self.in_paragraph {
self.paragraph_walker.update(c);
}
}
Expand All @@ -198,17 +197,20 @@ where
self.flush_old_attribute();

self.buffers.last_start_tag.clear();

let is_paragraph_tag = !P::is_noop() && is_paragraph_tag(&self.buffers.current_tag_name);

if !self.current_tag_is_closing {
self.buffers
.last_start_tag
.extend(&self.buffers.current_tag_name);

if is_paragraph_tag(&self.buffers.current_tag_name) {
if is_paragraph_tag {
self.in_paragraph = true;
self.last_paragraph_i = self.link_buf.len();
self.paragraph_walker.finish_paragraph();
}
} else if is_paragraph_tag(&self.buffers.current_tag_name) {
} else if is_paragraph_tag {
let paragraph = self.paragraph_walker.finish_paragraph();
if self.in_paragraph {
for link in &mut self.link_buf[self.last_paragraph_i..] {
Expand All @@ -229,7 +231,7 @@ where
}

fn set_self_closing(&mut self) {
if is_paragraph_tag(&self.buffers.current_tag_name) {
if !P::is_noop() && is_paragraph_tag(&self.buffers.current_tag_name) {
self.in_paragraph = false;
}
}
Expand Down
13 changes: 4 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,7 @@ where
{
println!("Reading files");

let html_result = extract_html_links::<BrokenLinkCollector<_>, P>(
&base_path,
check_anchors,
sources_path.is_some(),
)?;
let html_result = extract_html_links::<BrokenLinkCollector<_>, P>(&base_path, check_anchors)?;

let used_links_len = html_result.collector.used_links_count();
println!(
Expand Down Expand Up @@ -329,7 +325,7 @@ fn dump_paragraphs(path: PathBuf) -> Result<(), Error> {
Some(x) if HTML_FILES.contains(&x) => {
let document = Document::new(Path::new(""), &path);
document
.links::<DebugParagraphWalker<ParagraphHasher>>(&mut doc_buf, false, true)?
.links::<DebugParagraphWalker<ParagraphHasher>>(&mut doc_buf, false)?
.filter_map(|link| Some((link.into_paragraph()?, None)))
.collect()
}
Expand Down Expand Up @@ -388,7 +384,6 @@ fn walk_files(
fn extract_html_links<C: LinkCollector<P::Paragraph>, P: ParagraphWalker>(
base_path: &Path,
check_anchors: bool,
get_paragraphs: bool,
) -> Result<HtmlResult<C>, Error> {
let result: Result<_, Error> = walk_files(base_path)
.try_fold(
Expand All @@ -413,7 +408,7 @@ fn extract_html_links<C: LinkCollector<P::Paragraph>, P: ParagraphWalker>(
}

for link in document
.links::<P>(&mut doc_buf, check_anchors, get_paragraphs)
.links::<P>(&mut doc_buf, check_anchors)
.with_context(|| format!("Failed to read file {}", document.path.display()))?
{
collector.ingest(link);
Expand Down Expand Up @@ -497,7 +492,7 @@ fn extract_markdown_paragraphs<P: ParagraphWalker>(
fn match_all_paragraphs(base_path: PathBuf, sources_path: PathBuf) -> Result<(), Error> {
println!("Reading files");
let html_result =
extract_html_links::<UsedLinkCollector<_>, ParagraphHasher>(&base_path, true, true)?;
extract_html_links::<UsedLinkCollector<_>, ParagraphHasher>(&base_path, true)?;

println!("Reading source files");
let paragraps_to_sourcefile = extract_markdown_paragraphs::<ParagraphHasher>(&sources_path)?;
Expand Down
14 changes: 14 additions & 0 deletions src/paragraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ pub trait ParagraphWalker: Send {
type Paragraph: Clone + Eq + PartialEq + Hash + Ord + PartialOrd + Send + 'static;

fn new() -> Self;

#[inline]
fn is_noop() -> bool {
false
}

fn update_raw(&mut self, text: &[u8]);
fn finish_paragraph(&mut self) -> Option<Self::Paragraph>;

Expand Down Expand Up @@ -101,12 +107,20 @@ pub enum VoidParagraph {}
impl ParagraphWalker for NoopParagraphWalker {
type Paragraph = VoidParagraph;

#[inline]
fn new() -> Self {
NoopParagraphWalker
}

#[inline]
fn is_noop() -> bool {
true
}

#[inline]
fn update_raw(&mut self, _text: &[u8]) {}

#[inline]
fn finish_paragraph(&mut self) -> Option<Self::Paragraph> {
None
}
Expand Down

0 comments on commit fa88781

Please sign in to comment.