Skip to content

Commit 8cdc554

Browse files
author
walking devel
authored
net.html: fix panic in html.parse() called with empty string, remove replacement of \n in the original content (#17206)
1 parent a8102f1 commit 8cdc554

File tree

4 files changed

+62
-4
lines changed

4 files changed

+62
-4
lines changed

vlib/net/html/dom.v

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ fn (mut dom DocumentObjectModel) add_tag_by_attribute(tag &Tag) {
9696

9797
fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
9898
dom.constructed = true
99+
100+
// If there are no tags, accessing `tag_list` below does panic.
101+
if tag_list.len == 0 {
102+
return
103+
}
104+
99105
mut temp_map := map[string]int{}
100106
mut temp_int := null_element
101107
mut temp_string := ''
@@ -106,6 +112,7 @@ fn (mut dom DocumentObjectModel) construct(tag_list []&Tag) {
106112
temp_map['0'] = dom.btree.add_children(tag_list[0])
107113
stack.push(0)
108114
root_index := 0
115+
109116
for index := 1; index < tag_list.len; index++ {
110117
mut tag := tag_list[index]
111118
dom.print_debug(tag.str())

vlib/net/html/parser.v

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ mut:
1313
is_attribute bool
1414
opened_code_type string
1515
line_count int
16+
outside_tag bool
1617
lexeme_builder strings.Builder = strings.new_builder(100)
1718
code_tags map[string]bool = {
1819
'script': true
@@ -90,6 +91,7 @@ fn (mut parser Parser) init() {
9091
parser.tags = []&Tag{}
9192
parser.dom.close_tags['/!document'] = true
9293
parser.lexical_attributes.current_tag = &Tag{}
94+
parser.lexical_attributes.outside_tag = true
9395
parser.initialized = true
9496
}
9597

@@ -231,19 +233,40 @@ pub fn (mut parser Parser) split_parse(data string) {
231233
parser.lexical_attributes.lexeme_builder.go_back_to(0)
232234
parser.generate_tag()
233235
parser.lexical_attributes.open_tag = true
236+
parser.lexical_attributes.outside_tag = false
234237
} else {
235238
parser.lexical_attributes.lexeme_builder.write_u8(chr)
236239
}
237240
}
241+
242+
// If `data` has not tags but has only text.
243+
if parser.lexical_attributes.outside_tag {
244+
temp_string := parser.lexical_attributes.lexeme_builder.str()
245+
246+
if parser.tags.len == 0 {
247+
parser.tags << &Tag{
248+
name: 'text'
249+
content: temp_string
250+
}
251+
} else if parser.tags.len == 1 {
252+
mut tag := parser.tags.first()
253+
254+
if tag.name == 'text' {
255+
tag.content += temp_string
256+
}
257+
}
258+
}
238259
}
239260

240261
// parse_html parses the given HTML string
241262
pub fn (mut parser Parser) parse_html(data string) {
242263
parser.init()
243264
mut lines := data.split_into_lines()
244-
for line in lines {
265+
for index, line in lines {
245266
parser.lexical_attributes.line_count++
246-
parser.split_parse(line)
267+
// Parser shouldn't replace `\n`, because it may break JS code or text which sticks together.
268+
// After `split_into_lines()` we need to add `\n` again.
269+
parser.split_parse(if index < lines.len - 1 { '${line}\n' } else { line })
247270
}
248271
parser.generate_tag()
249272
parser.dom.debug_file = parser.debug_file

vlib/net/html/parser_test.v

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,34 @@ module html
22

33
import strings
44

5+
fn test_parse_empty_string() {
6+
mut parser := Parser{}
7+
8+
parser.parse_html('')
9+
10+
assert parser.tags.len == 0
11+
}
12+
13+
fn test_parse_text() {
14+
mut parser := Parser{}
15+
text_content := 'test\nparse\ntext'
16+
17+
parser.parse_html(text_content)
18+
19+
assert parser.tags.len == 1
20+
assert parser.tags.first().text() == text_content
21+
}
22+
23+
fn test_parse_one_tag_with_text() {
24+
mut parser := Parser{}
25+
text_content := 'tag\nwith\ntext'
26+
p_tag := '<p>${text_content}</p>'
27+
28+
parser.parse_html(p_tag)
29+
30+
assert parser.tags.first().text() == text_content
31+
}
32+
533
fn test_split_parse() {
634
mut parser := Parser{}
735
parser.init()
@@ -37,5 +65,5 @@ fn test_script_tag() {
3765
script_content := "\nvar googletag = googletag || {};\ngoogletag.cmd = googletag.cmd || [];if(3 > 5) {console.log('Birl');}\n"
3866
temp_html := '<html><body><script>${script_content}</script></body></html>'
3967
parser.parse_html(temp_html)
40-
assert parser.tags[2].content.len == script_content.replace('\n', '').len
68+
assert parser.tags[2].content.len == script_content.len
4169
}

vlib/net/html/tag.v

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub fn (tag Tag) text() string {
4040
return '\n'
4141
}
4242
mut text_str := strings.new_builder(200)
43-
text_str.write_string(tag.content.replace('\n', ''))
43+
text_str.write_string(tag.content)
4444
for child in tag.children {
4545
text_str.write_string(child.text())
4646
}

0 commit comments

Comments
 (0)