Skip to content

Commit

Permalink
encoding.html: improve hex unescaping, add test (#19279)
Browse files Browse the repository at this point in the history
  • Loading branch information
ttytm committed Sep 6, 2023
1 parent be6f20a commit 3329919
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
23 changes: 12 additions & 11 deletions vlib/encoding/html/escape.v
Expand Up @@ -51,29 +51,30 @@ fn unescape_all(input string) string {
mut result := []rune{}
runes := input.runes()
mut i := 0
outer: for i < runes.len {
for i < runes.len {
if runes[i] == `&` {
mut j := i + 1
for j < runes.len && runes[j] != `;` {
j++
}
if j < runes.len && runes[i + 1] == `#` {
// Numeric escape sequences (e.g., &#39; or &#x27;)
code := runes[i + 2..j].string()
if code[0] == `x` {
if runes[i + 2] == `x` || runes[i + 2] == `X` {
// Hexadecimal escape sequence
for c in code[1..] {
if !c.is_hex_digit() {
// Leave invalid sequences unchanged
result << runes[i..j + 1]
i = j + 1
continue outer
if v := hex.decode(runes[i + 3..j].string()) {
mut n := u16(0)
for x in v {
n = n * 256 + x
}
result << n
} else {
// Leave invalid sequences unchanged
result << runes[i..j + 1]
i = j + 1
}
result << hex.decode(code[1..]) or { []u8{} }.bytestr().runes()
} else {
// Decimal escape sequence
if v := strconv.atoi(code) {
if v := strconv.atoi(runes[i + 2..j].string()) {
result << v
} else {
// Leave invalid sequences unchanged
Expand Down
1 change: 1 addition & 0 deletions vlib/encoding/html/escape_test.v
Expand Up @@ -48,6 +48,7 @@ fn test_unescape_html() {
fn test_unescape_all_html() {
// Test different formats
assert html.unescape('&#39;&#x27;&apos;', all: true) == "'''"
assert html.unescape('&#10836; = &#x02a54; = &#X02A54; = &Or;', all: true) == '⩔ = ⩔ = ⩔ = ⩔'
// Converse escape tests
assert html.unescape('&lt;&gt;&amp;', all: true) == '<>&'
assert html.unescape('No change', all: true) == 'No change'
Expand Down

0 comments on commit 3329919

Please sign in to comment.