Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

strconv.atoi: bug fix #18875 #18925

Merged
merged 6 commits into from
Jul 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 13 additions & 9 deletions vlib/strconv/atoi.v
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,14 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
// Use compile-time constants for common cases.
cutoff := strconv.max_u64 / u64(base) + u64(1)
max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
basem1 := base - 1

mut n := u64(0)
for i in start_index .. s.len {
mut c := s[i]

// manage underscore inside the number
if c == `_` {
// println("Here: ${s#[i..]}")
if i == start_index || i >= (s.len - 1) {
// println("_ limit")
return u64(0), 1
Expand All @@ -109,21 +109,25 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
continue
}

mut sub_count := 0

// get the 0-9 digit
c -= 48 // subtract the rune `0`

// check if we are in the superior base rune interval [A..Z]
if c >= base {
c -= 7
}

// check if we are in the superior base rune interval [a..z]
if c >= base {
c -= 32 // subtract the `A` - `0` rune to obtain the value of the digit
if c >= 17 { // (65 - 48)
sub_count++
c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit

// check if we are in the superior base rune interval [a..z]
if c >= 42 { // (97 - 7 - 48)
sub_count++
c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
}
}

// check for digit over base
if c >= base {
if c > basem1 || (sub_count == 0 && c > 9) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is 9 here? Should it not be related to _base somehow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, basem1 stands for base - 1 that is calculated outside the loop, it is a simple optimization.
make operation inside an if is always expensive as cpu cycles.
basem1 will be stored in a register that will speed up the check in the loop.
The 9 is due the fact that if the flow arrive at that line with sub_count equal to 0 and the value is greater then 9, it is an illegal char in the sequence.
Are all optimizations in order to try to squeeze cpu cycle in the conversions.

return n, i + 1
}

Expand Down
29 changes: 29 additions & 0 deletions vlib/strconv/atoi_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ fn test_atoi() {
}

fn test_parse_int() {
// symbols coverage
assert strconv.parse_int('1234567890', 10, 32)! == 1234567890
assert strconv.parse_int('19aAbBcCdDeEfF', 16, 64)! == 0x19aAbBcCdDeEfF
// Different bases
assert strconv.parse_int('16', 16, 0)! == 0x16
assert strconv.parse_int('16', 8, 0)! == 0o16
Expand Down Expand Up @@ -83,6 +86,32 @@ fn test_common_parse_uint2() {
assert error == 4
}

fn test_common_parse_uint2_fail() {
mut ascii_characters := [' ', '!', '"', '#', '\$', '%', '&', "'", '(', ')', '*', '+', ',',
'-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|',
'}', '~']
mut special_characters := [':', ';', '<', '=', '>', '?', '@', 'X', 'Y', 'Z', '[', '\\', ']',
'^', '_', '`']

num0, err0 := strconv.common_parse_uint2('1Ab', 16, 32)
assert num0 == 427
assert err0 == 0

for ch in ascii_characters {
// println("ch: [${ch}]")
txt_str := '${ch[0]:c}12Ab'
num, err := strconv.common_parse_uint2(txt_str, 16, 32)
assert err != 0
}

for ch in special_characters {
// println("ch: [${ch}]")
txt_str := '${ch[0]:c}12Ab'
num, err := strconv.common_parse_uint2(txt_str, 16, 32)
assert err != 0
}
}

fn test_common_parse_uint2_compatibility() {
test_list := [
'1234,1234',
Expand Down