Skip to content

Commit

Permalink
Fix GFM task list checkboxes followed by eol
Browse files Browse the repository at this point in the history
Closes GH-24.
  • Loading branch information
wooorm committed Nov 25, 2022
1 parent d5ae7bb commit 4f77634
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 46 deletions.
92 changes: 47 additions & 45 deletions src/to_mdast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ struct CompileContext<'a> {
// compile markdown.
character_reference_marker: u8,
gfm_table_inside: bool,
gfm_task_list_item_check_after: bool,
hard_break_after: bool,
heading_setext_text_after: bool,
jsx_tag_stack: Vec<JsxTag>,
Expand Down Expand Up @@ -128,7 +127,6 @@ impl<'a> CompileContext<'a> {
bytes,
character_reference_marker: 0,
gfm_table_inside: false,
gfm_task_list_item_check_after: false,
hard_break_after: false,
heading_setext_text_after: false,
jsx_tag_stack: vec![],
Expand Down Expand Up @@ -347,7 +345,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
| Name::GfmTableRow
| Name::GfmTableCell
| Name::HeadingAtx
| Name::ListItem
| Name::ListOrdered
| Name::ListUnordered
| Name::Paragraph
Expand All @@ -358,6 +355,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
Name::CharacterEscapeValue
| Name::CodeFlowChunk
| Name::CodeTextData
| Name::Data
| Name::FrontmatterChunk
| Name::HtmlFlowData
| Name::HtmlTextData
Expand Down Expand Up @@ -385,7 +383,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
Name::CodeFenced | Name::MathFlow => on_exit_raw_flow(context)?,
Name::CodeIndented => on_exit_code_indented(context)?,
Name::CodeText | Name::MathText => on_exit_raw_text(context)?,
Name::Data => on_exit_data_actual(context)?,
Name::DefinitionDestinationString => on_exit_definition_destination_string(context),
Name::DefinitionLabelString | Name::GfmFootnoteDefinitionLabelString => {
on_exit_definition_id(context);
Expand All @@ -399,7 +396,6 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
| Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal(context)?,
Name::GfmFootnoteCall | Name::Image | Name::Link => on_exit_media(context)?,
Name::GfmTable => on_exit_gfm_table(context)?,
Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
Name::GfmTaskListItemValueUnchecked | Name::GfmTaskListItemValueChecked => {
on_exit_gfm_task_list_item_value(context);
}
Expand All @@ -411,6 +407,7 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
Name::HtmlFlow | Name::HtmlText => on_exit_html(context)?,
Name::LabelText => on_exit_label_text(context),
Name::LineEnding => on_exit_line_ending(context)?,
Name::ListItem => on_exit_list_item(context)?,
Name::ListItemValue => on_exit_list_item_value(context),
Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression => {
on_exit_mdx_esm_or_expression(context)?;
Expand Down Expand Up @@ -1089,29 +1086,6 @@ fn on_exit_data(context: &mut CompileContext) -> Result<(), String> {
Ok(())
}

/// Handle [`Exit`][Kind::Exit]:[`Data`][Name::Data] itself.
fn on_exit_data_actual(context: &mut CompileContext) -> Result<(), String> {
on_exit_data(context)?;

// This field is set when a check exits.
// When that’s the case, there’s always a `data` event right after it.
// That data event is the first child (after the check) of the paragraph.
// We update the text positional info (from the already fixed paragraph),
// and remove the first byte, which is always a space or tab.
if context.gfm_task_list_item_check_after {
let parent = context.tail_mut();
let start = parent.position().unwrap().start.clone();
let node = parent.children_mut().unwrap().last_mut().unwrap();
node.position_mut().unwrap().start = start;
if let Node::Text(node) = node {
node.value.remove(0);
}
context.gfm_task_list_item_check_after = false;
}

Ok(())
}

/// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString].
fn on_exit_definition_destination_string(context: &mut CompileContext) {
let value = context.resume().to_string();
Expand Down Expand Up @@ -1210,23 +1184,6 @@ fn on_exit_gfm_table(context: &mut CompileContext) -> Result<(), String> {
Ok(())
}

/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) {
// This field is set when a check exits.
// When that’s the case, there’s always a `data` event right after it.
// That data event is the first child (after the check) of the paragraph.
// We update the paragraph positional info to start after the check.
let mut start = point_from_event(&context.events[context.index]);
debug_assert!(
matches!(context.bytes[start.offset], b'\t' | b' '),
"expected tab or space after check"
);
start.column += 1;
start.offset += 1;
context.tail_mut().position_mut().unwrap().start = start;
context.gfm_task_list_item_check_after = true;
}

/// Handle [`Exit`][Kind::Exit]:{[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked],[`GfmTaskListItemValueUnchecked`][Name::GfmTaskListItemValueUnchecked]}.
fn on_exit_gfm_task_list_item_value(context: &mut CompileContext) {
let checked = context.events[context.index].name == Name::GfmTaskListItemValueChecked;
Expand Down Expand Up @@ -1416,6 +1373,51 @@ fn on_exit_media(context: &mut CompileContext) -> Result<(), String> {
Ok(())
}

/// Handle [`Exit`][Kind::Exit]:[`ListItem`][Name::ListItem].
fn on_exit_list_item(context: &mut CompileContext) -> Result<(), String> {
if let Node::ListItem(item) = context.tail_mut() {
if item.checked.is_some() {
if let Some(Node::Paragraph(paragraph)) = item.children.first_mut() {
if let Some(Node::Text(text)) = paragraph.children.first_mut() {
let mut point = text.position.as_ref().unwrap().start.clone();
let bytes = text.value.as_bytes();
let mut start = 0;

// Move past eol.
if matches!(bytes[0], b'\t' | b' ') {
point.offset += 1;
point.column += 1;
start += 1;
} else if matches!(bytes[0], b'\r' | b'\n') {
point.line += 1;
point.column = 1;
point.offset += 1;
start += 1;
// Move past the LF of CRLF.
if bytes.len() > 1 && bytes[0] == b'\r' && bytes[1] == b'\n' {
point.offset += 1;
start += 1;
}
}

// The whole text is whitespace: update the text.
if start == bytes.len() {
paragraph.children.remove(0);
} else {
text.value = str::from_utf8(&bytes[start..]).unwrap().into();
text.position.as_mut().unwrap().start = point.clone();
}
paragraph.position.as_mut().unwrap().start = point;
}
}
}
}

on_exit(context)?;

Ok(())
}

/// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue].
fn on_exit_list_item_value(context: &mut CompileContext) {
let start = Slice::from_position(
Expand Down
8 changes: 8 additions & 0 deletions tests/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,5 +109,13 @@ fn fuzz() -> Result<(), String> {
"10: attention in different links (GH-21)"
);

assert!(
matches!(
to_mdast("* [ ]\na", &Default::default()),
Ok(mdast::Node::Root(_))
),
"11: gfm task list items followed by eols (GH-24)"
);

Ok(())
}
60 changes: 59 additions & 1 deletion tests/gfm_task_list_item.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use markdown::{
mdast::{List, ListItem, Node, Paragraph, Root, Text},
mdast::{Emphasis, List, ListItem, Node, Paragraph, Root, Text},
to_html, to_html_with_options, to_mdast,
unist::Position,
Options, ParseOptions,
Expand Down Expand Up @@ -291,5 +291,63 @@ Text.</li>
"should support task list items as `checked` fields on `ListItem`s in mdast"
);

assert_eq!(
to_mdast(
"* [x]\r\n a\n* [ ] b\n* [x]\t \r*c*",
&ParseOptions::gfm()
)?,
Node::Root(Root {
children: vec![Node::List(List {
ordered: false,
spread: false,
start: None,
children: vec![
Node::ListItem(ListItem {
checked: Some(true),
spread: false,
children: vec![Node::Paragraph(Paragraph {
children: vec![Node::Text(Text {
value: "a".into(),
position: Some(Position::new(2, 1, 7, 2, 4, 10))
}),],
position: Some(Position::new(2, 1, 7, 2, 4, 10))
})],
position: Some(Position::new(1, 1, 0, 2, 4, 10))
}),
Node::ListItem(ListItem {
checked: Some(false),
spread: false,
children: vec![Node::Paragraph(Paragraph {
children: vec![Node::Text(Text {
value: " b".into(),
position: Some(Position::new(3, 7, 17, 3, 10, 20))
}),],
position: Some(Position::new(3, 7, 17, 3, 10, 20))
})],
position: Some(Position::new(3, 1, 11, 3, 10, 20))
}),
Node::ListItem(ListItem {
checked: Some(true),
spread: false,
children: vec![Node::Paragraph(Paragraph {
children: vec![Node::Emphasis(Emphasis {
children: vec![Node::Text(Text {
value: "c".into(),
position: Some(Position::new(5, 2, 30, 5, 3, 31))
}),],
position: Some(Position::new(5, 1, 29, 5, 4, 32))
})],
position: Some(Position::new(5, 1, 29, 5, 4, 32))
})],
position: Some(Position::new(4, 1, 21, 5, 4, 32))
}),
],
position: Some(Position::new(1, 1, 0, 5, 4, 32))
})],
position: Some(Position::new(1, 1, 0, 5, 4, 32))
}),
"should handle lots of whitespace after checkbox, and non-text"
);

Ok(())
}

0 comments on commit 4f77634

Please sign in to comment.